summaryrefslogtreecommitdiff
path: root/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/Kconfig13
-rw-r--r--drivers/iommu/Makefile1
-rw-r--r--drivers/iommu/amd_iommu.c417
-rw-r--r--drivers/iommu/amd_iommu.h96
-rw-r--r--drivers/iommu/amd_iommu_debugfs.c5
-rw-r--r--drivers/iommu/amd_iommu_init.c15
-rw-r--r--drivers/iommu/amd_iommu_proto.h96
-rw-r--r--drivers/iommu/amd_iommu_types.h17
-rw-r--r--drivers/iommu/amd_iommu_v2.c14
-rw-r--r--drivers/iommu/arm-smmu-v3.c20
-rw-r--r--drivers/iommu/dmar.c99
-rw-r--r--drivers/iommu/hyperv-iommu.c2
-rw-r--r--drivers/iommu/intel-iommu-debugfs.c62
-rw-r--r--drivers/iommu/intel-iommu.c892
-rw-r--r--drivers/iommu/intel-pasid.c309
-rw-r--r--drivers/iommu/intel-pasid.h27
-rw-r--r--drivers/iommu/intel-svm.c448
-rw-r--r--drivers/iommu/intel_irq_remapping.c2
-rw-r--r--drivers/iommu/iommu.c149
-rw-r--r--drivers/iommu/iova.c6
-rw-r--r--drivers/iommu/ipmmu-vmsa.c7
-rw-r--r--drivers/iommu/msm_iommu.c2
-rw-r--r--drivers/iommu/mtk_iommu_v1.c18
-rw-r--r--drivers/iommu/omap-iommu.c4
-rw-r--r--drivers/iommu/qcom_iommu.c5
-rw-r--r--drivers/iommu/sun50i-iommu.c1023
-rw-r--r--drivers/iommu/virtio-iommu.c2
27 files changed, 2668 insertions, 1083 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 58b4a4dbfc78..aca76383f201 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -303,6 +303,15 @@ config ROCKCHIP_IOMMU
Say Y here if you are using a Rockchip SoC that includes an IOMMU
device.
+config SUN50I_IOMMU
+ bool "Allwinner H6 IOMMU Support"
+ depends on ARCH_SUNXI || COMPILE_TEST
+ select ARM_DMA_USE_IOMMU
+ select IOMMU_API
+ select IOMMU_DMA
+ help
+ Support for the IOMMU introduced in the Allwinner H6 SoCs.
+
config TEGRA_IOMMU_GART
bool "Tegra GART IOMMU Support"
depends on ARCH_TEGRA_2x_SOC
@@ -362,7 +371,7 @@ config IPMMU_VMSA
config SPAPR_TCE_IOMMU
bool "sPAPR TCE IOMMU Support"
- depends on PPC_POWERNV || PPC_PSERIES || (PPC && COMPILE_TEST)
+ depends on PPC_POWERNV || PPC_PSERIES
select IOMMU_API
help
Enables bits of IOMMU API required by VFIO. The iommu_ops
@@ -457,7 +466,7 @@ config S390_AP_IOMMU
config MTK_IOMMU
bool "MTK IOMMU Support"
- depends on ARM || ARM64 || COMPILE_TEST
+ depends on HAS_DMA
depends on ARCH_MEDIATEK || COMPILE_TEST
select ARM_DMA_USE_IOMMU
select IOMMU_API
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 9f33fdb3bb05..57cf4ba5e27c 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_MTK_IOMMU_V1) += mtk_iommu_v1.o
obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
+obj-$(CONFIG_SUN50I_IOMMU) += sun50i-iommu.o
obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index c30367413683..311ef7105c6d 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -22,7 +22,6 @@
#include <linux/dma-direct.h>
#include <linux/dma-iommu.h>
#include <linux/iommu-helper.h>
-#include <linux/iommu.h>
#include <linux/delay.h>
#include <linux/amd-iommu.h>
#include <linux/notifier.h>
@@ -43,8 +42,7 @@
#include <asm/gart.h>
#include <asm/dma.h>
-#include "amd_iommu_proto.h"
-#include "amd_iommu_types.h"
+#include "amd_iommu.h"
#include "irq_remapping.h"
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
@@ -71,6 +69,8 @@
*/
#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
+#define DEFAULT_PGTABLE_LEVEL PAGE_MODE_3_LEVEL
+
static DEFINE_SPINLOCK(pd_bitmap_lock);
/* List of all available dev_data structures */
@@ -99,8 +99,9 @@ struct iommu_cmd {
struct kmem_cache *amd_iommu_irq_cache;
static void update_domain(struct protection_domain *domain);
-static int protection_domain_init(struct protection_domain *domain);
static void detach_device(struct device *dev);
+static void update_and_flush_device_table(struct protection_domain *domain,
+ struct domain_pgtable *pgtable);
/****************************************************************************
*
@@ -125,7 +126,8 @@ static inline int get_acpihid_device_id(struct device *dev,
return -ENODEV;
list_for_each_entry(p, &acpihid_map, list) {
- if (acpi_dev_hid_uid_match(adev, p->hid, p->uid)) {
+ if (acpi_dev_hid_uid_match(adev, p->hid,
+ p->uid[0] ? p->uid : NULL)) {
if (entry)
*entry = p;
return p->devid;
@@ -151,6 +153,26 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom)
return container_of(dom, struct protection_domain, domain);
}
+static void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
+ struct domain_pgtable *pgtable)
+{
+ u64 pt_root = atomic64_read(&domain->pt_root);
+
+ pgtable->root = (u64 *)(pt_root & PAGE_MASK);
+ pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
+}
+
+static u64 amd_iommu_domain_encode_pgtable(u64 *root, int mode)
+{
+ u64 pt_root;
+
+ /* lowest 3 bits encode pgtable mode */
+ pt_root = mode & 7;
+ pt_root |= (u64)root;
+
+ return pt_root;
+}
+
static struct iommu_dev_data *alloc_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
@@ -257,12 +279,6 @@ static struct iommu_dev_data *find_dev_data(u16 devid)
return dev_data;
}
-struct iommu_dev_data *get_dev_data(struct device *dev)
-{
- return dev->archdata.iommu;
-}
-EXPORT_SYMBOL(get_dev_data);
-
/*
* Find or create an IOMMU group for a acpihid device.
*/
@@ -291,16 +307,15 @@ static struct iommu_group *acpihid_device_group(struct device *dev)
static bool pci_iommuv2_capable(struct pci_dev *pdev)
{
static const int caps[] = {
- PCI_EXT_CAP_ID_ATS,
PCI_EXT_CAP_ID_PRI,
PCI_EXT_CAP_ID_PASID,
};
int i, pos;
- if (pci_ats_disabled())
+ if (!pci_ats_supported(pdev))
return false;
- for (i = 0; i < 3; ++i) {
+ for (i = 0; i < 2; ++i) {
pos = pci_find_ext_capability(pdev, caps[i]);
if (pos == 0)
return false;
@@ -313,7 +328,7 @@ static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
{
struct iommu_dev_data *dev_data;
- dev_data = get_dev_data(&pdev->dev);
+ dev_data = dev_iommu_priv_get(&pdev->dev);
return dev_data->errata & (1 << erratum) ? true : false;
}
@@ -348,7 +363,7 @@ static int iommu_init_device(struct device *dev)
struct iommu_dev_data *dev_data;
int devid;
- if (dev->archdata.iommu)
+ if (dev_iommu_priv_get(dev))
return 0;
devid = get_device_id(dev);
@@ -375,7 +390,7 @@ static int iommu_init_device(struct device *dev)
dev_data->iommu_v2 = iommu->is_iommu_v2;
}
- dev->archdata.iommu = dev_data;
+ dev_iommu_priv_set(dev, dev_data);
return 0;
}
@@ -397,22 +412,16 @@ static void iommu_ignore_device(struct device *dev)
static void amd_iommu_uninit_device(struct device *dev)
{
struct iommu_dev_data *dev_data;
- struct amd_iommu *iommu;
- int devid;
-
- devid = get_device_id(dev);
- if (devid < 0)
- return;
-
- iommu = amd_iommu_rlookup_table[devid];
- dev_data = search_dev_data(devid);
+ dev_data = dev_iommu_priv_get(dev);
if (!dev_data)
return;
if (dev_data->domain)
detach_device(dev);
+ dev_iommu_priv_set(dev, NULL);
+
/*
* We keep dev_data around for unplugged devices and reuse it when the
* device is re-plugged - not doing so would introduce a ton of races.
@@ -475,7 +484,7 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid),
devid & 0xff);
if (pdev)
- dev_data = get_dev_data(&pdev->dev);
+ dev_data = dev_iommu_priv_get(&pdev->dev);
if (dev_data && __ratelimit(&dev_data->rs)) {
pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
@@ -1372,15 +1381,19 @@ static struct page *free_sub_pt(unsigned long root, int mode,
return freelist;
}
-static void free_pagetable(struct protection_domain *domain)
+static void free_pagetable(struct domain_pgtable *pgtable)
{
- unsigned long root = (unsigned long)domain->pt_root;
struct page *freelist = NULL;
+ unsigned long root;
- BUG_ON(domain->mode < PAGE_MODE_NONE ||
- domain->mode > PAGE_MODE_6_LEVEL);
+ if (pgtable->mode == PAGE_MODE_NONE)
+ return;
+
+ BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
+ pgtable->mode > PAGE_MODE_6_LEVEL);
- freelist = free_sub_pt(root, domain->mode, freelist);
+ root = (unsigned long)pgtable->root;
+ freelist = free_sub_pt(root, pgtable->mode, freelist);
free_page_list(freelist);
}
@@ -1394,24 +1407,39 @@ static bool increase_address_space(struct protection_domain *domain,
unsigned long address,
gfp_t gfp)
{
+ struct domain_pgtable pgtable;
unsigned long flags;
- bool ret = false;
- u64 *pte;
+ bool ret = true;
+ u64 *pte, root;
spin_lock_irqsave(&domain->lock, flags);
- if (address <= PM_LEVEL_SIZE(domain->mode) ||
- WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL))
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+ if (address <= PM_LEVEL_SIZE(pgtable.mode))
+ goto out;
+
+ ret = false;
+ if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL))
goto out;
pte = (void *)get_zeroed_page(gfp);
if (!pte)
goto out;
- *pte = PM_LEVEL_PDE(domain->mode,
- iommu_virt_to_phys(domain->pt_root));
- domain->pt_root = pte;
- domain->mode += 1;
+ *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root));
+
+ pgtable.root = pte;
+ pgtable.mode += 1;
+ update_and_flush_device_table(domain, &pgtable);
+ domain_flush_complete(domain);
+
+ /*
+ * Device Table needs to be updated and flushed before the new root can
+ * be published.
+ */
+ root = amd_iommu_domain_encode_pgtable(pte, pgtable.mode);
+ atomic64_set(&domain->pt_root, root);
ret = true;
@@ -1428,16 +1456,29 @@ static u64 *alloc_pte(struct protection_domain *domain,
gfp_t gfp,
bool *updated)
{
+ struct domain_pgtable pgtable;
int level, end_lvl;
u64 *pte, *page;
BUG_ON(!is_power_of_2(page_size));
- while (address > PM_LEVEL_SIZE(domain->mode))
- *updated = increase_address_space(domain, address, gfp) || *updated;
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
- level = domain->mode - 1;
- pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
+ while (address > PM_LEVEL_SIZE(pgtable.mode)) {
+ /*
+ * Return an error if there is no memory to update the
+ * page-table.
+ */
+ if (!increase_address_space(domain, address, gfp))
+ return NULL;
+
+ /* Read new values to check if update was successful */
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ }
+
+
+ level = pgtable.mode - 1;
+ pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
address = PAGE_SIZE_ALIGN(address, page_size);
end_lvl = PAGE_SIZE_LEVEL(page_size);
@@ -1513,16 +1554,19 @@ static u64 *fetch_pte(struct protection_domain *domain,
unsigned long address,
unsigned long *page_size)
{
+ struct domain_pgtable pgtable;
int level;
u64 *pte;
*page_size = 0;
- if (address > PM_LEVEL_SIZE(domain->mode))
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+ if (address > PM_LEVEL_SIZE(pgtable.mode))
return NULL;
- level = domain->mode - 1;
- pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
+ level = pgtable.mode - 1;
+ pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
*page_size = PTE_LEVEL_PAGE_SIZE(level);
while (level > 0) {
@@ -1637,7 +1681,13 @@ out:
unsigned long flags;
spin_lock_irqsave(&dom->lock, flags);
- update_domain(dom);
+ /*
+ * Flush domain TLB(s) and wait for completion. Any Device-Table
+ * Updates and flushing already happened in
+ * increase_address_space().
+ */
+ domain_flush_tlb_pde(dom);
+ domain_flush_complete(dom);
spin_unlock_irqrestore(&dom->lock, flags);
}
@@ -1756,78 +1806,18 @@ static void free_gcr3_table(struct protection_domain *domain)
free_page((unsigned long)domain->gcr3_tbl);
}
-/*
- * Free a domain, only used if something went wrong in the
- * allocation path and we need to free an already allocated page table
- */
-static void dma_ops_domain_free(struct protection_domain *domain)
-{
- if (!domain)
- return;
-
- iommu_put_dma_cookie(&domain->domain);
-
- free_pagetable(domain);
-
- if (domain->id)
- domain_id_free(domain->id);
-
- kfree(domain);
-}
-
-/*
- * Allocates a new protection domain usable for the dma_ops functions.
- * It also initializes the page table and the address allocator data
- * structures required for the dma_ops interface
- */
-static struct protection_domain *dma_ops_domain_alloc(void)
-{
- struct protection_domain *domain;
-
- domain = kzalloc(sizeof(struct protection_domain), GFP_KERNEL);
- if (!domain)
- return NULL;
-
- if (protection_domain_init(domain))
- goto free_domain;
-
- domain->mode = PAGE_MODE_3_LEVEL;
- domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
- domain->flags = PD_DMA_OPS_MASK;
- if (!domain->pt_root)
- goto free_domain;
-
- if (iommu_get_dma_cookie(&domain->domain) == -ENOMEM)
- goto free_domain;
-
- return domain;
-
-free_domain:
- dma_ops_domain_free(domain);
-
- return NULL;
-}
-
-/*
- * little helper function to check whether a given protection domain is a
- * dma_ops domain
- */
-static bool dma_ops_domain(struct protection_domain *domain)
-{
- return domain->flags & PD_DMA_OPS_MASK;
-}
-
static void set_dte_entry(u16 devid, struct protection_domain *domain,
+ struct domain_pgtable *pgtable,
bool ats, bool ppr)
{
u64 pte_root = 0;
u64 flags = 0;
u32 old_domid;
- if (domain->mode != PAGE_MODE_NONE)
- pte_root = iommu_virt_to_phys(domain->pt_root);
+ if (pgtable->mode != PAGE_MODE_NONE)
+ pte_root = iommu_virt_to_phys(pgtable->root);
- pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
+ pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
@@ -1900,6 +1890,7 @@ static void clear_dte_entry(u16 devid)
static void do_attach(struct iommu_dev_data *dev_data,
struct protection_domain *domain)
{
+ struct domain_pgtable pgtable;
struct amd_iommu *iommu;
bool ats;
@@ -1915,7 +1906,9 @@ static void do_attach(struct iommu_dev_data *dev_data,
domain->dev_cnt += 1;
/* Update device table */
- set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2);
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ set_dte_entry(dev_data->devid, domain, &pgtable,
+ ats, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
device_flush_dte(dev_data);
@@ -2031,7 +2024,7 @@ static int attach_device(struct device *dev,
spin_lock_irqsave(&domain->lock, flags);
- dev_data = get_dev_data(dev);
+ dev_data = dev_iommu_priv_get(dev);
spin_lock(&dev_data->lock);
@@ -2095,7 +2088,7 @@ static void detach_device(struct device *dev)
struct iommu_dev_data *dev_data;
unsigned long flags;
- dev_data = get_dev_data(dev);
+ dev_data = dev_iommu_priv_get(dev);
domain = dev_data->domain;
spin_lock_irqsave(&domain->lock, flags);
@@ -2144,7 +2137,7 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
iommu = amd_iommu_rlookup_table[devid];
- if (get_dev_data(dev))
+ if (dev_iommu_priv_get(dev))
return &iommu->iommu;
ret = iommu_init_device(dev);
@@ -2174,16 +2167,12 @@ static void amd_iommu_probe_finalize(struct device *dev)
static void amd_iommu_release_device(struct device *dev)
{
+ int devid = get_device_id(dev);
struct amd_iommu *iommu;
- int devid;
if (!check_device(dev))
return;
- devid = get_device_id(dev);
- if (devid < 0)
- return;
-
iommu = amd_iommu_rlookup_table[devid];
amd_iommu_uninit_device(dev);
@@ -2224,23 +2213,36 @@ static int amd_iommu_domain_get_attr(struct iommu_domain *domain,
*
*****************************************************************************/
-static void update_device_table(struct protection_domain *domain)
+static void update_device_table(struct protection_domain *domain,
+ struct domain_pgtable *pgtable)
{
struct iommu_dev_data *dev_data;
list_for_each_entry(dev_data, &domain->dev_list, list) {
- set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled,
- dev_data->iommu_v2);
+ set_dte_entry(dev_data->devid, domain, pgtable,
+ dev_data->ats.enabled, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
}
}
+static void update_and_flush_device_table(struct protection_domain *domain,
+ struct domain_pgtable *pgtable)
+{
+ update_device_table(domain, pgtable);
+ domain_flush_devices(domain);
+}
+
static void update_domain(struct protection_domain *domain)
{
- update_device_table(domain);
+ struct domain_pgtable pgtable;
- domain_flush_devices(domain);
+ /* Update device table */
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ update_and_flush_device_table(domain, &pgtable);
+
+ /* Flush domain TLB(s) and wait for completion */
domain_flush_tlb_pde(domain);
+ domain_flush_complete(domain);
}
int __init amd_iommu_init_api(void)
@@ -2308,27 +2310,46 @@ static void cleanup_domain(struct protection_domain *domain)
static void protection_domain_free(struct protection_domain *domain)
{
+ struct domain_pgtable pgtable;
+
if (!domain)
return;
if (domain->id)
domain_id_free(domain->id);
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ atomic64_set(&domain->pt_root, 0);
+ free_pagetable(&pgtable);
+
kfree(domain);
}
-static int protection_domain_init(struct protection_domain *domain)
+static int protection_domain_init(struct protection_domain *domain, int mode)
{
+ u64 *pt_root = NULL, root;
+
+ BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL);
+
spin_lock_init(&domain->lock);
domain->id = domain_id_alloc();
if (!domain->id)
return -ENOMEM;
INIT_LIST_HEAD(&domain->dev_list);
+ if (mode != PAGE_MODE_NONE) {
+ pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!pt_root)
+ return -ENOMEM;
+ }
+
+ root = amd_iommu_domain_encode_pgtable(pt_root, mode);
+ atomic64_set(&domain->pt_root, root);
+
return 0;
}
-static struct protection_domain *protection_domain_alloc(void)
+static struct protection_domain *protection_domain_alloc(int mode)
{
struct protection_domain *domain;
@@ -2336,7 +2357,7 @@ static struct protection_domain *protection_domain_alloc(void)
if (!domain)
return NULL;
- if (protection_domain_init(domain))
+ if (protection_domain_init(domain, mode))
goto out_err;
return domain;
@@ -2349,45 +2370,30 @@ out_err:
static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
{
- struct protection_domain *pdomain;
+ struct protection_domain *domain;
+ int mode = DEFAULT_PGTABLE_LEVEL;
- switch (type) {
- case IOMMU_DOMAIN_UNMANAGED:
- pdomain = protection_domain_alloc();
- if (!pdomain)
- return NULL;
+ if (type == IOMMU_DOMAIN_IDENTITY)
+ mode = PAGE_MODE_NONE;
- pdomain->mode = PAGE_MODE_3_LEVEL;
- pdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
- if (!pdomain->pt_root) {
- protection_domain_free(pdomain);
- return NULL;
- }
+ domain = protection_domain_alloc(mode);
+ if (!domain)
+ return NULL;
- pdomain->domain.geometry.aperture_start = 0;
- pdomain->domain.geometry.aperture_end = ~0ULL;
- pdomain->domain.geometry.force_aperture = true;
+ domain->domain.geometry.aperture_start = 0;
+ domain->domain.geometry.aperture_end = ~0ULL;
+ domain->domain.geometry.force_aperture = true;
- break;
- case IOMMU_DOMAIN_DMA:
- pdomain = dma_ops_domain_alloc();
- if (!pdomain) {
- pr_err("Failed to allocate\n");
- return NULL;
- }
- break;
- case IOMMU_DOMAIN_IDENTITY:
- pdomain = protection_domain_alloc();
- if (!pdomain)
- return NULL;
+ if (type == IOMMU_DOMAIN_DMA &&
+ iommu_get_dma_cookie(&domain->domain) == -ENOMEM)
+ goto free_domain;
- pdomain->mode = PAGE_MODE_NONE;
- break;
- default:
- return NULL;
- }
+ return &domain->domain;
- return &pdomain->domain;
+free_domain:
+ protection_domain_free(domain);
+
+ return NULL;
}
static void amd_iommu_domain_free(struct iommu_domain *dom)
@@ -2404,27 +2410,19 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
if (!dom)
return;
- switch (dom->type) {
- case IOMMU_DOMAIN_DMA:
- /* Now release the domain */
- dma_ops_domain_free(domain);
- break;
- default:
- if (domain->mode != PAGE_MODE_NONE)
- free_pagetable(domain);
+ if (dom->type == IOMMU_DOMAIN_DMA)
+ iommu_put_dma_cookie(&domain->domain);
- if (domain->flags & PD_IOMMUV2_MASK)
- free_gcr3_table(domain);
+ if (domain->flags & PD_IOMMUV2_MASK)
+ free_gcr3_table(domain);
- protection_domain_free(domain);
- break;
- }
+ protection_domain_free(domain);
}
static void amd_iommu_detach_device(struct iommu_domain *dom,
struct device *dev)
{
- struct iommu_dev_data *dev_data = dev->archdata.iommu;
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
struct amd_iommu *iommu;
int devid;
@@ -2462,7 +2460,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
if (!check_device(dev))
return -EINVAL;
- dev_data = dev->archdata.iommu;
+ dev_data = dev_iommu_priv_get(dev);
dev_data->defer_attach = false;
iommu = amd_iommu_rlookup_table[dev_data->devid];
@@ -2493,10 +2491,12 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
gfp_t gfp)
{
struct protection_domain *domain = to_pdomain(dom);
+ struct domain_pgtable pgtable;
int prot = 0;
int ret;
- if (domain->mode == PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode == PAGE_MODE_NONE)
return -EINVAL;
if (iommu_prot & IOMMU_READ)
@@ -2516,8 +2516,10 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
struct iommu_iotlb_gather *gather)
{
struct protection_domain *domain = to_pdomain(dom);
+ struct domain_pgtable pgtable;
- if (domain->mode == PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode == PAGE_MODE_NONE)
return 0;
return iommu_unmap_page(domain, iova, page_size);
@@ -2528,9 +2530,11 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
{
struct protection_domain *domain = to_pdomain(dom);
unsigned long offset_mask, pte_pgsize;
+ struct domain_pgtable pgtable;
u64 *pte, __pte;
- if (domain->mode == PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode == PAGE_MODE_NONE)
return iova;
pte = fetch_pte(domain, iova, &pte_pgsize);
@@ -2612,12 +2616,14 @@ static void amd_iommu_get_resv_regions(struct device *dev,
list_add_tail(&region->list, head);
}
-static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
- struct device *dev)
+bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
+ struct device *dev)
{
- struct iommu_dev_data *dev_data = dev->archdata.iommu;
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
+
return dev_data->defer_attach;
}
+EXPORT_SYMBOL_GPL(amd_iommu_is_attach_deferred);
static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
@@ -2640,7 +2646,7 @@ static int amd_iommu_def_domain_type(struct device *dev)
{
struct iommu_dev_data *dev_data;
- dev_data = get_dev_data(dev);
+ dev_data = dev_iommu_priv_get(dev);
if (!dev_data)
return 0;
@@ -2699,18 +2705,22 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
void amd_iommu_domain_direct_map(struct iommu_domain *dom)
{
struct protection_domain *domain = to_pdomain(dom);
+ struct domain_pgtable pgtable;
unsigned long flags;
spin_lock_irqsave(&domain->lock, flags);
+ /* First save pgtable configuration*/
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
/* Update data structure */
- domain->mode = PAGE_MODE_NONE;
+ atomic64_set(&domain->pt_root, 0);
/* Make changes visible to IOMMUs */
update_domain(domain);
/* Page-table is not visible to IOMMU anymore, so free it */
- free_pagetable(domain);
+ free_pagetable(&pgtable);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -2899,9 +2909,11 @@ static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc)
static int __set_gcr3(struct protection_domain *domain, int pasid,
unsigned long cr3)
{
+ struct domain_pgtable pgtable;
u64 *pte;
- if (domain->mode != PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode != PAGE_MODE_NONE)
return -EINVAL;
pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
@@ -2915,9 +2927,11 @@ static int __set_gcr3(struct protection_domain *domain, int pasid,
static int __clear_gcr3(struct protection_domain *domain, int pasid)
{
+ struct domain_pgtable pgtable;
u64 *pte;
- if (domain->mode != PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode != PAGE_MODE_NONE)
return -EINVAL;
pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
@@ -2965,7 +2979,7 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
struct amd_iommu *iommu;
struct iommu_cmd cmd;
- dev_data = get_dev_data(&pdev->dev);
+ dev_data = dev_iommu_priv_get(&pdev->dev);
iommu = amd_iommu_rlookup_table[dev_data->devid];
build_complete_ppr(&cmd, dev_data->devid, pasid, status,
@@ -2978,23 +2992,27 @@ EXPORT_SYMBOL(amd_iommu_complete_ppr);
struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev)
{
struct protection_domain *pdomain;
- struct iommu_domain *io_domain;
+ struct iommu_dev_data *dev_data;
struct device *dev = &pdev->dev;
+ struct iommu_domain *io_domain;
if (!check_device(dev))
return NULL;
- pdomain = get_dev_data(dev)->domain;
- if (pdomain == NULL && get_dev_data(dev)->defer_attach) {
- get_dev_data(dev)->defer_attach = false;
- io_domain = iommu_get_domain_for_dev(dev);
+ dev_data = dev_iommu_priv_get(&pdev->dev);
+ pdomain = dev_data->domain;
+ io_domain = iommu_get_domain_for_dev(dev);
+
+ if (pdomain == NULL && dev_data->defer_attach) {
+ dev_data->defer_attach = false;
pdomain = to_pdomain(io_domain);
attach_device(dev, pdomain);
}
+
if (pdomain == NULL)
return NULL;
- if (!dma_ops_domain(pdomain))
+ if (io_domain->type != IOMMU_DOMAIN_DMA)
return NULL;
/* Only return IOMMUv2 domains */
@@ -3012,7 +3030,7 @@ void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum)
if (!amd_iommu_v2_supported())
return;
- dev_data = get_dev_data(&pdev->dev);
+ dev_data = dev_iommu_priv_get(&pdev->dev);
dev_data->errata |= (1 << erratum);
}
EXPORT_SYMBOL(amd_iommu_enable_device_erratum);
@@ -3031,11 +3049,8 @@ int amd_iommu_device_info(struct pci_dev *pdev,
memset(info, 0, sizeof(*info));
- if (!pci_ats_disabled()) {
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS);
- if (pos)
- info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
- }
+ if (pci_ats_supported(pdev))
+ info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
if (pos)
diff --git a/drivers/iommu/amd_iommu.h b/drivers/iommu/amd_iommu.h
index 12d540d9b59b..f892992c8744 100644
--- a/drivers/iommu/amd_iommu.h
+++ b/drivers/iommu/amd_iommu.h
@@ -1,9 +1,103 @@
/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2009-2010 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
#ifndef AMD_IOMMU_H
#define AMD_IOMMU_H
-int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line);
+#include <linux/iommu.h>
+
+#include "amd_iommu_types.h"
+
+extern int amd_iommu_get_num_iommus(void);
+extern int amd_iommu_init_dma_ops(void);
+extern int amd_iommu_init_passthrough(void);
+extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
+extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
+extern void amd_iommu_apply_erratum_63(u16 devid);
+extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
+extern int amd_iommu_init_devices(void);
+extern void amd_iommu_uninit_devices(void);
+extern void amd_iommu_init_notifier(void);
+extern int amd_iommu_init_api(void);
+
+#ifdef CONFIG_AMD_IOMMU_DEBUGFS
+void amd_iommu_debugfs_setup(struct amd_iommu *iommu);
+#else
+static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {}
+#endif
+
+/* Needed for interrupt remapping */
+extern int amd_iommu_prepare(void);
+extern int amd_iommu_enable(void);
+extern void amd_iommu_disable(void);
+extern int amd_iommu_reenable(int);
+extern int amd_iommu_enable_faulting(void);
+extern int amd_iommu_guest_ir;
+
+/* IOMMUv2 specific functions */
+struct iommu_domain;
+
+extern bool amd_iommu_v2_supported(void);
+extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
+extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
+extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
+extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
+extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
+ u64 address);
+extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
+extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
+ unsigned long cr3);
+extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
+extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
+
+#ifdef CONFIG_IRQ_REMAP
+extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
+#else
+static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
+{
+ return 0;
+}
+#endif
+
+#define PPR_SUCCESS 0x0
+#define PPR_INVALID 0x1
+#define PPR_FAILURE 0xf
+
+extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
+ int status, int tag);
+
+static inline bool is_rd890_iommu(struct pci_dev *pdev)
+{
+ return (pdev->vendor == PCI_VENDOR_ID_ATI) &&
+ (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
+}
+
+static inline bool iommu_feature(struct amd_iommu *iommu, u64 f)
+{
+ if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
+ return false;
+
+ return !!(iommu->features & f);
+}
+
+static inline u64 iommu_virt_to_phys(void *vaddr)
+{
+ return (u64)__sme_set(virt_to_phys(vaddr));
+}
+
+static inline void *iommu_phys_to_virt(unsigned long paddr)
+{
+ return phys_to_virt(__sme_clr(paddr));
+}
+
+extern bool translation_pre_enabled(struct amd_iommu *iommu);
+extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
+ struct device *dev);
+extern int __init add_special_device(u8 type, u8 id, u16 *devid,
+ bool cmd_line);
#ifdef CONFIG_DMI
void amd_iommu_apply_ivrs_quirks(void);
diff --git a/drivers/iommu/amd_iommu_debugfs.c b/drivers/iommu/amd_iommu_debugfs.c
index c6a5c737ef09..545372fcc72f 100644
--- a/drivers/iommu/amd_iommu_debugfs.c
+++ b/drivers/iommu/amd_iommu_debugfs.c
@@ -8,10 +8,9 @@
*/
#include <linux/debugfs.h>
-#include <linux/iommu.h>
#include <linux/pci.h>
-#include "amd_iommu_proto.h"
-#include "amd_iommu_types.h"
+
+#include "amd_iommu.h"
static struct dentry *amd_iommu_debugfs;
static DEFINE_MUTEX(amd_iommu_debugfs_lock);
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 6be3853a5d97..3faff7f80fd2 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -18,7 +18,6 @@
#include <linux/msi.h>
#include <linux/amd-iommu.h>
#include <linux/export.h>
-#include <linux/iommu.h>
#include <linux/kmemleak.h>
#include <linux/mem_encrypt.h>
#include <asm/pci-direct.h>
@@ -32,9 +31,8 @@
#include <asm/irq_remapping.h>
#include <linux/crash_dump.h>
+
#include "amd_iommu.h"
-#include "amd_iommu_proto.h"
-#include "amd_iommu_types.h"
#include "irq_remapping.h"
/*
@@ -1329,8 +1327,8 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
}
case IVHD_DEV_ACPI_HID: {
u16 devid;
- u8 hid[ACPIHID_HID_LEN] = {0};
- u8 uid[ACPIHID_UID_LEN] = {0};
+ u8 hid[ACPIHID_HID_LEN];
+ u8 uid[ACPIHID_UID_LEN];
int ret;
if (h->type != 0x40) {
@@ -1347,6 +1345,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
}
+ uid[0] = '\0';
switch (e->uidf) {
case UID_NOT_PRESENT:
@@ -1361,8 +1360,8 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case UID_IS_CHARACTER:
- memcpy(uid, (u8 *)(&e->uid), ACPIHID_UID_LEN - 1);
- uid[ACPIHID_UID_LEN - 1] = '\0';
+ memcpy(uid, &e->uid, e->uidl);
+ uid[e->uidl] = '\0';
break;
default:
@@ -2936,7 +2935,7 @@ static int __init parse_amd_iommu_intr(char *str)
{
for (; *str; ++str) {
if (strncmp(str, "legacy", 6) == 0) {
- amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
+ amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
break;
}
if (strncmp(str, "vapic", 5) == 0) {
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
deleted file mode 100644
index 92c2ba6468a0..000000000000
--- a/drivers/iommu/amd_iommu_proto.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2009-2010 Advanced Micro Devices, Inc.
- * Author: Joerg Roedel <jroedel@suse.de>
- */
-
-#ifndef _ASM_X86_AMD_IOMMU_PROTO_H
-#define _ASM_X86_AMD_IOMMU_PROTO_H
-
-#include "amd_iommu_types.h"
-
-extern int amd_iommu_get_num_iommus(void);
-extern int amd_iommu_init_dma_ops(void);
-extern int amd_iommu_init_passthrough(void);
-extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
-extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
-extern void amd_iommu_apply_erratum_63(u16 devid);
-extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
-extern int amd_iommu_init_devices(void);
-extern void amd_iommu_uninit_devices(void);
-extern void amd_iommu_init_notifier(void);
-extern int amd_iommu_init_api(void);
-
-#ifdef CONFIG_AMD_IOMMU_DEBUGFS
-void amd_iommu_debugfs_setup(struct amd_iommu *iommu);
-#else
-static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {}
-#endif
-
-/* Needed for interrupt remapping */
-extern int amd_iommu_prepare(void);
-extern int amd_iommu_enable(void);
-extern void amd_iommu_disable(void);
-extern int amd_iommu_reenable(int);
-extern int amd_iommu_enable_faulting(void);
-extern int amd_iommu_guest_ir;
-
-/* IOMMUv2 specific functions */
-struct iommu_domain;
-
-extern bool amd_iommu_v2_supported(void);
-extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
-extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
-extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
-extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
-extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
- u64 address);
-extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
-extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
- unsigned long cr3);
-extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
-extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
-
-#ifdef CONFIG_IRQ_REMAP
-extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
-#else
-static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
-{
- return 0;
-}
-#endif
-
-#define PPR_SUCCESS 0x0
-#define PPR_INVALID 0x1
-#define PPR_FAILURE 0xf
-
-extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
- int status, int tag);
-
-static inline bool is_rd890_iommu(struct pci_dev *pdev)
-{
- return (pdev->vendor == PCI_VENDOR_ID_ATI) &&
- (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
-}
-
-static inline bool iommu_feature(struct amd_iommu *iommu, u64 f)
-{
- if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
- return false;
-
- return !!(iommu->features & f);
-}
-
-static inline u64 iommu_virt_to_phys(void *vaddr)
-{
- return (u64)__sme_set(virt_to_phys(vaddr));
-}
-
-static inline void *iommu_phys_to_virt(unsigned long paddr)
-{
- return phys_to_virt(__sme_clr(paddr));
-}
-
-extern bool translation_pre_enabled(struct amd_iommu *iommu);
-extern struct iommu_dev_data *get_dev_data(struct device *dev);
-#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index d0d7b6a0c3d8..30a5d412255a 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -395,10 +395,10 @@
#define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */
extern bool amd_iommu_dump;
-#define DUMP_printk(format, arg...) \
- do { \
- if (amd_iommu_dump) \
- printk(KERN_INFO "AMD-Vi: " format, ## arg); \
+#define DUMP_printk(format, arg...) \
+ do { \
+ if (amd_iommu_dump) \
+ pr_info("AMD-Vi: " format, ## arg); \
} while(0);
/* global flag if IOMMUs cache non-present entries */
@@ -468,8 +468,7 @@ struct protection_domain {
iommu core code */
spinlock_t lock; /* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
- int mode; /* paging mode (0-6 levels) */
- u64 *pt_root; /* page table root pointer */
+ atomic64_t pt_root; /* pgtable root and pgtable mode */
int glx; /* Number of levels for GCR3 table */
u64 *gcr3_tbl; /* Guest CR3 table */
unsigned long flags; /* flags to find out type of domain */
@@ -477,6 +476,12 @@ struct protection_domain {
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
};
+/* For decocded pt_root */
+struct domain_pgtable {
+ int mode;
+ u64 *root;
+};
+
/*
* Structure where we save information about one hardware AMD IOMMU in the
* system.
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index d6d85debd01b..c8a7b6b39222 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -13,13 +13,11 @@
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
-#include <linux/iommu.h>
#include <linux/wait.h>
#include <linux/pci.h>
#include <linux/gfp.h>
-#include "amd_iommu_types.h"
-#include "amd_iommu_proto.h"
+#include "amd_iommu.h"
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Joerg Roedel <jroedel@suse.de>");
@@ -517,13 +515,12 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
struct amd_iommu_fault *iommu_fault;
struct pasid_state *pasid_state;
struct device_state *dev_state;
+ struct pci_dev *pdev = NULL;
unsigned long flags;
struct fault *fault;
bool finish;
u16 tag, devid;
int ret;
- struct iommu_dev_data *dev_data;
- struct pci_dev *pdev = NULL;
iommu_fault = data;
tag = iommu_fault->tag & 0x1ff;
@@ -534,12 +531,11 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
devid & 0xff);
if (!pdev)
return -ENODEV;
- dev_data = get_dev_data(&pdev->dev);
- /* In kdump kernel pci dev is not initialized yet -> send INVALID */
ret = NOTIFY_DONE;
- if (translation_pre_enabled(amd_iommu_rlookup_table[devid])
- && dev_data->defer_attach) {
+
+ /* In kdump kernel pci dev is not initialized yet -> send INVALID */
+ if (amd_iommu_is_attach_deferred(NULL, &pdev->dev)) {
amd_iommu_complete_ppr(pdev, iommu_fault->pasid,
PPR_INVALID, tag);
goto out;
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 8a908c50c306..f578677a5c41 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2663,26 +2663,20 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
}
}
-#ifdef CONFIG_PCI_ATS
static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
{
- struct pci_dev *pdev;
+ struct device *dev = master->dev;
struct arm_smmu_device *smmu = master->smmu;
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- if (!(smmu->features & ARM_SMMU_FEAT_ATS) || !dev_is_pci(master->dev) ||
- !(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS) || pci_ats_disabled())
+ if (!(smmu->features & ARM_SMMU_FEAT_ATS))
return false;
- pdev = to_pci_dev(master->dev);
- return !pdev->untrusted && pdev->ats_cap;
-}
-#else
-static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
-{
- return false;
+ if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
+ return false;
+
+ return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
}
-#endif
static void arm_smmu_enable_ats(struct arm_smmu_master *master)
{
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index f77dae7ba7d4..60a2970c37ff 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -963,6 +963,7 @@ static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
warn_invalid_dmar(phys_addr, " returns all ones");
goto unmap;
}
+ iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
@@ -1156,12 +1157,11 @@ static inline void reclaim_free_desc(struct q_inval *qi)
}
}
-static int qi_check_fault(struct intel_iommu *iommu, int index)
+static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
{
u32 fault;
int head, tail;
struct q_inval *qi = iommu->qi;
- int wait_index = (index + 1) % QI_LENGTH;
int shift = qi_shift(iommu);
if (qi->desc_status[wait_index] == QI_ABORT)
@@ -1224,17 +1224,21 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
}
/*
- * Submit the queued invalidation descriptor to the remapping
- * hardware unit and wait for its completion.
+ * Function to submit invalidation descriptors of all types to the queued
+ * invalidation interface(QI). Multiple descriptors can be submitted at a
+ * time, a wait descriptor will be appended to each submission to ensure
+ * hardware has completed the invalidation before return. Wait descriptors
+ * can be part of the submission but it will not be polled for completion.
*/
-int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
+int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
+ unsigned int count, unsigned long options)
{
- int rc;
struct q_inval *qi = iommu->qi;
- int offset, shift, length;
struct qi_desc wait_desc;
int wait_index, index;
unsigned long flags;
+ int offset, shift;
+ int rc, i;
if (!qi)
return 0;
@@ -1243,32 +1247,41 @@ restart:
rc = 0;
raw_spin_lock_irqsave(&qi->q_lock, flags);
- while (qi->free_cnt < 3) {
+ /*
+ * Check if we have enough empty slots in the queue to submit,
+ * the calculation is based on:
+ * # of desc + 1 wait desc + 1 space between head and tail
+ */
+ while (qi->free_cnt < count + 2) {
raw_spin_unlock_irqrestore(&qi->q_lock, flags);
cpu_relax();
raw_spin_lock_irqsave(&qi->q_lock, flags);
}
index = qi->free_head;
- wait_index = (index + 1) % QI_LENGTH;
+ wait_index = (index + count) % QI_LENGTH;
shift = qi_shift(iommu);
- length = 1 << shift;
- qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
+ for (i = 0; i < count; i++) {
+ offset = ((index + i) % QI_LENGTH) << shift;
+ memcpy(qi->desc + offset, &desc[i], 1 << shift);
+ qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
+ }
+ qi->desc_status[wait_index] = QI_IN_USE;
- offset = index << shift;
- memcpy(qi->desc + offset, desc, length);
wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
+ if (options & QI_OPT_WAIT_DRAIN)
+ wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
wait_desc.qw2 = 0;
wait_desc.qw3 = 0;
offset = wait_index << shift;
- memcpy(qi->desc + offset, &wait_desc, length);
+ memcpy(qi->desc + offset, &wait_desc, 1 << shift);
- qi->free_head = (qi->free_head + 2) % QI_LENGTH;
- qi->free_cnt -= 2;
+ qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
+ qi->free_cnt -= count + 1;
/*
* update the HW tail register indicating the presence of
@@ -1284,7 +1297,7 @@ restart:
* a deadlock where the interrupt context can wait indefinitely
* for free slots in the queue.
*/
- rc = qi_check_fault(iommu, index);
+ rc = qi_check_fault(iommu, index, wait_index);
if (rc)
break;
@@ -1293,7 +1306,8 @@ restart:
raw_spin_lock(&qi->q_lock);
}
- qi->desc_status[index] = QI_DONE;
+ for (i = 0; i < count; i++)
+ qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
reclaim_free_desc(qi);
raw_spin_unlock_irqrestore(&qi->q_lock, flags);
@@ -1317,7 +1331,7 @@ void qi_global_iec(struct intel_iommu *iommu)
desc.qw3 = 0;
/* should never fail */
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
}
void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
@@ -1331,7 +1345,7 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
}
void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
@@ -1355,7 +1369,7 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
}
void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
@@ -1377,7 +1391,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
}
/* PASID-based IOTLB invalidation */
@@ -1418,7 +1432,46 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
QI_EIOTLB_AM(mask);
}
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
+}
+
+/* PASID-based device IOTLB Invalidate */
+void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+ u32 pasid, u16 qdep, u64 addr,
+ unsigned int size_order, u64 granu)
+{
+ unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
+ struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
+
+ desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
+ QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
+ QI_DEV_IOTLB_PFSID(pfsid);
+ desc.qw1 = QI_DEV_EIOTLB_GLOB(granu);
+
+ /*
+ * If S bit is 0, we only flush a single page. If S bit is set,
+ * The least significant zero bit indicates the invalidation address
+ * range. VT-d spec 6.5.2.6.
+ * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
+ * size order = 0 is PAGE_SIZE 4KB
+ * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
+ * ECAP.
+ */
+ desc.qw1 |= addr & ~mask;
+ if (size_order)
+ desc.qw1 |= QI_DEV_EIOTLB_SIZE;
+
+ qi_submit_sync(iommu, &desc, 1, 0);
+}
+
+void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
+ u64 granu, int pasid)
+{
+ struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
+
+ desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
+ QI_PC_GRAN(granu) | QI_PC_TYPE;
+ qi_submit_sync(iommu, &desc, 1, 0);
}
/*
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index a386b83e0e34..3c0c67a99c7b 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -131,7 +131,7 @@ static int hyperv_irq_remapping_activate(struct irq_domain *domain,
return 0;
}
-static struct irq_domain_ops hyperv_ir_domain_ops = {
+static const struct irq_domain_ops hyperv_ir_domain_ops = {
.alloc = hyperv_irq_remapping_alloc,
.free = hyperv_irq_remapping_free,
.activate = hyperv_irq_remapping_activate,
diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c
index 3eb1fe240fb0..cf1ebb98e418 100644
--- a/drivers/iommu/intel-iommu-debugfs.c
+++ b/drivers/iommu/intel-iommu-debugfs.c
@@ -372,6 +372,66 @@ static int domain_translation_struct_show(struct seq_file *m, void *unused)
}
DEFINE_SHOW_ATTRIBUTE(domain_translation_struct);
+static void invalidation_queue_entry_show(struct seq_file *m,
+ struct intel_iommu *iommu)
+{
+ int index, shift = qi_shift(iommu);
+ struct qi_desc *desc;
+ int offset;
+
+ if (ecap_smts(iommu->ecap))
+ seq_puts(m, "Index\t\tqw0\t\t\tqw1\t\t\tqw2\t\t\tqw3\t\t\tstatus\n");
+ else
+ seq_puts(m, "Index\t\tqw0\t\t\tqw1\t\t\tstatus\n");
+
+ for (index = 0; index < QI_LENGTH; index++) {
+ offset = index << shift;
+ desc = iommu->qi->desc + offset;
+ if (ecap_smts(iommu->ecap))
+ seq_printf(m, "%5d\t%016llx\t%016llx\t%016llx\t%016llx\t%016x\n",
+ index, desc->qw0, desc->qw1,
+ desc->qw2, desc->qw3,
+ iommu->qi->desc_status[index]);
+ else
+ seq_printf(m, "%5d\t%016llx\t%016llx\t%016x\n",
+ index, desc->qw0, desc->qw1,
+ iommu->qi->desc_status[index]);
+ }
+}
+
+static int invalidation_queue_show(struct seq_file *m, void *unused)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ unsigned long flags;
+ struct q_inval *qi;
+ int shift;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ qi = iommu->qi;
+ shift = qi_shift(iommu);
+
+ if (!qi || !ecap_qis(iommu->ecap))
+ continue;
+
+ seq_printf(m, "Invalidation queue on IOMMU: %s\n", iommu->name);
+
+ raw_spin_lock_irqsave(&qi->q_lock, flags);
+ seq_printf(m, " Base: 0x%llx\tHead: %lld\tTail: %lld\n",
+ (u64)virt_to_phys(qi->desc),
+ dmar_readq(iommu->reg + DMAR_IQH_REG) >> shift,
+ dmar_readq(iommu->reg + DMAR_IQT_REG) >> shift);
+ invalidation_queue_entry_show(m, iommu);
+ raw_spin_unlock_irqrestore(&qi->q_lock, flags);
+ seq_putc(m, '\n');
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(invalidation_queue);
+
#ifdef CONFIG_IRQ_REMAP
static void ir_tbl_remap_entry_show(struct seq_file *m,
struct intel_iommu *iommu)
@@ -490,6 +550,8 @@ void __init intel_iommu_debugfs_init(void)
debugfs_create_file("domain_translation_struct", 0444,
intel_iommu_debug, NULL,
&domain_translation_struct_fops);
+ debugfs_create_file("invalidation_queue", 0444, intel_iommu_debug,
+ NULL, &invalidation_queue_fops);
#ifdef CONFIG_IRQ_REMAP
debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug,
NULL, &ir_translation_struct_fops);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index b906727f5b85..648a785e078a 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -296,31 +296,6 @@ static inline void context_clear_entry(struct context_entry *context)
static struct dmar_domain *si_domain;
static int hw_pass_through = 1;
-/* si_domain contains mulitple devices */
-#define DOMAIN_FLAG_STATIC_IDENTITY BIT(0)
-
-/*
- * This is a DMA domain allocated through the iommu domain allocation
- * interface. But one or more devices belonging to this domain have
- * been chosen to use a private domain. We should avoid to use the
- * map/unmap/iova_to_phys APIs on it.
- */
-#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
-
-/*
- * When VT-d works in the scalable mode, it allows DMA translation to
- * happen through either first level or second level page table. This
- * bit marks that the DMA translation for the domain goes through the
- * first level page table, otherwise, it goes through the second level.
- */
-#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(2)
-
-/*
- * Domain represents a virtual machine which demands iommu nested
- * translation mode support.
- */
-#define DOMAIN_FLAG_NESTING_MODE BIT(3)
-
#define for_each_domain_iommu(idx, domain) \
for (idx = 0; idx < g_num_of_iommus; idx++) \
if (domain->iommu_refcnt[idx])
@@ -355,11 +330,6 @@ static void domain_exit(struct dmar_domain *domain);
static void domain_remove_dev_info(struct dmar_domain *domain);
static void dmar_remove_one_dev_info(struct device *dev);
static void __dmar_remove_one_dev_info(struct device_domain_info *info);
-static void domain_context_clear(struct intel_iommu *iommu,
- struct device *dev);
-static int domain_detach_iommu(struct dmar_domain *domain,
- struct intel_iommu *iommu);
-static bool device_is_rmrr_locked(struct device *dev);
static int intel_iommu_attach_device(struct iommu_domain *domain,
struct device *dev);
static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -371,11 +341,11 @@ int dmar_disabled = 0;
int dmar_disabled = 1;
#endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */
-#ifdef INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
+#ifdef CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
int intel_iommu_sm = 1;
#else
int intel_iommu_sm;
-#endif /* INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
+#endif /* CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
int intel_iommu_enabled = 0;
EXPORT_SYMBOL_GPL(intel_iommu_enabled);
@@ -395,6 +365,21 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
#define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
+struct device_domain_info *get_domain_info(struct device *dev)
+{
+ struct device_domain_info *info;
+
+ if (!dev)
+ return NULL;
+
+ info = dev->archdata.iommu;
+ if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO ||
+ info == DEFER_DEVICE_DOMAIN_INFO))
+ return NULL;
+
+ return info;
+}
+
DEFINE_SPINLOCK(device_domain_lock);
static LIST_HEAD(device_domain_list);
@@ -446,12 +431,6 @@ static void init_translation_status(struct intel_iommu *iommu)
iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
}
-/* Convert generic 'struct iommu_domain to private struct dmar_domain */
-static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
-{
- return container_of(dom, struct dmar_domain, domain);
-}
-
static int __init intel_iommu_setup(char *str)
{
if (!str)
@@ -480,8 +459,7 @@ static int __init intel_iommu_setup(char *str)
pr_info("Intel-IOMMU: scalable mode supported\n");
intel_iommu_sm = 1;
} else if (!strncmp(str, "tboot_noforce", 13)) {
- printk(KERN_INFO
- "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
+ pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
intel_iommu_tboot_noforce = 1;
} else if (!strncmp(str, "nobounce", 8)) {
pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
@@ -1454,8 +1432,7 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info)
!pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
info->pri_enabled = 1;
#endif
- if (!pdev->untrusted && info->ats_supported &&
- pci_ats_page_aligned(pdev) &&
+ if (info->ats_supported && pci_ats_page_aligned(pdev) &&
!pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
info->ats_enabled = 1;
domain_update_iotlb(info->domain);
@@ -1763,6 +1740,9 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
if (ecap_prs(iommu->ecap))
intel_svm_finish_prq(iommu);
}
+ if (ecap_vcs(iommu->ecap) && vccap_pasid(iommu->vccap))
+ ioasid_unregister_allocator(&iommu->pasid_allocator);
+
#endif
}
@@ -1911,11 +1891,6 @@ static int dmar_init_reserved_ranges(void)
return 0;
}
-static void domain_reserve_special_ranges(struct dmar_domain *domain)
-{
- copy_reserved_iova(&reserved_iova_list, &domain->iovad);
-}
-
static inline int guestwidth_to_adjustwidth(int gaw)
{
int agaw;
@@ -1930,65 +1905,6 @@ static inline int guestwidth_to_adjustwidth(int gaw)
return agaw;
}
-static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
- int guest_width)
-{
- int adjust_width, agaw;
- unsigned long sagaw;
- int ret;
-
- init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
-
- if (!intel_iommu_strict) {
- ret = init_iova_flush_queue(&domain->iovad,
- iommu_flush_iova, iova_entry_free);
- if (ret)
- pr_info("iova flush queue initialization failed\n");
- }
-
- domain_reserve_special_ranges(domain);
-
- /* calculate AGAW */
- if (guest_width > cap_mgaw(iommu->cap))
- guest_width = cap_mgaw(iommu->cap);
- domain->gaw = guest_width;
- adjust_width = guestwidth_to_adjustwidth(guest_width);
- agaw = width_to_agaw(adjust_width);
- sagaw = cap_sagaw(iommu->cap);
- if (!test_bit(agaw, &sagaw)) {
- /* hardware doesn't support it, choose a bigger one */
- pr_debug("Hardware doesn't support agaw %d\n", agaw);
- agaw = find_next_bit(&sagaw, 5, agaw);
- if (agaw >= 5)
- return -ENODEV;
- }
- domain->agaw = agaw;
-
- if (ecap_coherent(iommu->ecap))
- domain->iommu_coherency = 1;
- else
- domain->iommu_coherency = 0;
-
- if (ecap_sc_support(iommu->ecap))
- domain->iommu_snooping = 1;
- else
- domain->iommu_snooping = 0;
-
- if (intel_iommu_superpage)
- domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
- else
- domain->iommu_superpage = 0;
-
- domain->nid = iommu->node;
-
- /* always allocate the top pgd */
- domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
- if (!domain->pgd)
- return -ENOMEM;
- __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
- return 0;
-}
-
static void domain_exit(struct dmar_domain *domain)
{
@@ -1996,7 +1912,8 @@ static void domain_exit(struct dmar_domain *domain)
domain_remove_dev_info(domain);
/* destroy iovas */
- put_iova_domain(&domain->iovad);
+ if (domain->domain.type == IOMMU_DOMAIN_DMA)
+ put_iova_domain(&domain->iovad);
if (domain->pgd) {
struct page *freelist;
@@ -2518,11 +2435,8 @@ struct dmar_domain *find_domain(struct device *dev)
if (unlikely(attach_deferred(dev) || iommu_dummy(dev)))
return NULL;
- if (dev_is_pci(dev))
- dev = &pci_real_dma_dev(to_pci_dev(dev))->dev;
-
/* No lock here, assumes no domain exit in normal case */
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (likely(info))
return info->domain;
@@ -2545,7 +2459,7 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
struct device_domain_info *info;
list_for_each_entry(info, &device_domain_list, global)
- if (info->iommu->segment == segment && info->bus == bus &&
+ if (info->segment == segment && info->bus == bus &&
info->devfn == devfn)
return info;
@@ -2582,6 +2496,12 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
flags);
}
+static bool dev_is_real_dma_subdevice(struct device *dev)
+{
+ return dev && dev_is_pci(dev) &&
+ pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
+}
+
static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
int bus, int devfn,
struct device *dev,
@@ -2596,8 +2516,18 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (!info)
return NULL;
- info->bus = bus;
- info->devfn = devfn;
+ if (!dev_is_real_dma_subdevice(dev)) {
+ info->bus = bus;
+ info->devfn = devfn;
+ info->segment = iommu->segment;
+ } else {
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ info->bus = pdev->bus->number;
+ info->devfn = pdev->devfn;
+ info->segment = pci_domain_nr(pdev->bus);
+ }
+
info->ats_supported = info->pasid_supported = info->pri_supported = 0;
info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
info->ats_qdep = 0;
@@ -2611,10 +2541,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (dev && dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(info->dev);
- if (!pdev->untrusted &&
- !pci_ats_disabled() &&
- ecap_dev_iotlb_support(iommu->ecap) &&
- pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
+ if (ecap_dev_iotlb_support(iommu->ecap) &&
+ pci_ats_supported(pdev) &&
dmar_find_matched_atsr_unit(pdev))
info->ats_supported = 1;
@@ -2637,7 +2565,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (!found) {
struct device_domain_info *info2;
- info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
+ info2 = dmar_search_domain_by_dev_info(info->segment, info->bus,
+ info->devfn);
if (info2) {
found = info2->domain;
info2->dev = dev;
@@ -2704,108 +2633,10 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
return domain;
}
-static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
-{
- *(u16 *)opaque = alias;
- return 0;
-}
-
-static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
-{
- struct device_domain_info *info;
- struct dmar_domain *domain = NULL;
- struct intel_iommu *iommu;
- u16 dma_alias;
- unsigned long flags;
- u8 bus, devfn;
-
- iommu = device_to_iommu(dev, &bus, &devfn);
- if (!iommu)
- return NULL;
-
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
-
- pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
-
- spin_lock_irqsave(&device_domain_lock, flags);
- info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
- PCI_BUS_NUM(dma_alias),
- dma_alias & 0xff);
- if (info) {
- iommu = info->iommu;
- domain = info->domain;
- }
- spin_unlock_irqrestore(&device_domain_lock, flags);
-
- /* DMA alias already has a domain, use it */
- if (info)
- goto out;
- }
-
- /* Allocate and initialize new domain for the device */
- domain = alloc_domain(0);
- if (!domain)
- return NULL;
- if (domain_init(domain, iommu, gaw)) {
- domain_exit(domain);
- return NULL;
- }
-
-out:
- return domain;
-}
-
-static struct dmar_domain *set_domain_for_dev(struct device *dev,
- struct dmar_domain *domain)
-{
- struct intel_iommu *iommu;
- struct dmar_domain *tmp;
- u16 req_id, dma_alias;
- u8 bus, devfn;
-
- iommu = device_to_iommu(dev, &bus, &devfn);
- if (!iommu)
- return NULL;
-
- req_id = ((u16)bus << 8) | devfn;
-
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
-
- pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
-
- /* register PCI DMA alias device */
- if (req_id != dma_alias) {
- tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
- dma_alias & 0xff, NULL, domain);
-
- if (!tmp || tmp != domain)
- return tmp;
- }
- }
-
- tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
- if (!tmp || tmp != domain)
- return tmp;
-
- return domain;
-}
-
static int iommu_domain_identity_map(struct dmar_domain *domain,
- unsigned long long start,
- unsigned long long end)
+ unsigned long first_vpfn,
+ unsigned long last_vpfn)
{
- unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
- unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
-
- if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
- dma_to_mm_pfn(last_vpfn))) {
- pr_err("Reserving iova failed\n");
- return -ENOMEM;
- }
-
- pr_debug("Mapping reserved region %llx-%llx\n", start, end);
/*
* RMRR range might have overlap with physical memory range,
* clear it first
@@ -2817,45 +2648,6 @@ static int iommu_domain_identity_map(struct dmar_domain *domain,
DMA_PTE_READ|DMA_PTE_WRITE);
}
-static int domain_prepare_identity_map(struct device *dev,
- struct dmar_domain *domain,
- unsigned long long start,
- unsigned long long end)
-{
- /* For _hardware_ passthrough, don't bother. But for software
- passthrough, we do it anyway -- it may indicate a memory
- range which is reserved in E820, so which didn't get set
- up to start with in si_domain */
- if (domain == si_domain && hw_pass_through) {
- dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
- start, end);
- return 0;
- }
-
- dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
-
- if (end < start) {
- WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
- "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
- dmi_get_system_info(DMI_BIOS_VENDOR),
- dmi_get_system_info(DMI_BIOS_VERSION),
- dmi_get_system_info(DMI_PRODUCT_VERSION));
- return -EIO;
- }
-
- if (end >> agaw_to_width(domain->agaw)) {
- WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
- "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
- agaw_to_width(domain->agaw),
- dmi_get_system_info(DMI_BIOS_VENDOR),
- dmi_get_system_info(DMI_BIOS_VERSION),
- dmi_get_system_info(DMI_PRODUCT_VERSION));
- return -EIO;
- }
-
- return iommu_domain_identity_map(domain, start, end);
-}
-
static int md_domain_init(struct dmar_domain *domain, int guest_width);
static int __init si_domain_init(int hw)
@@ -2882,7 +2674,8 @@ static int __init si_domain_init(int hw)
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
ret = iommu_domain_identity_map(si_domain,
- PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
+ mm_to_dma_pfn(start_pfn),
+ mm_to_dma_pfn(end_pfn));
if (ret)
return ret;
}
@@ -2911,17 +2704,6 @@ static int __init si_domain_init(int hw)
return 0;
}
-static int identity_mapping(struct device *dev)
-{
- struct device_domain_info *info;
-
- info = dev->archdata.iommu;
- if (info)
- return (info->domain == si_domain);
-
- return 0;
-}
-
static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
{
struct dmar_domain *ndomain;
@@ -3048,31 +2830,6 @@ static int device_def_domain_type(struct device *dev)
if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
return IOMMU_DOMAIN_IDENTITY;
-
- /*
- * We want to start off with all devices in the 1:1 domain, and
- * take them out later if we find they can't access all of memory.
- *
- * However, we can't do this for PCI devices behind bridges,
- * because all PCI devices behind the same bridge will end up
- * with the same source-id on their transactions.
- *
- * Practically speaking, we can't change things around for these
- * devices at run-time, because we can't be sure there'll be no
- * DMA transactions in flight for any of their siblings.
- *
- * So PCI devices (unless they're on the root bus) as well as
- * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
- * the 1:1 domain, just in _case_ one of their siblings turns out
- * not to be able to map all of memory.
- */
- if (!pci_is_pcie(pdev)) {
- if (!pci_is_root_bus(pdev->bus))
- return IOMMU_DOMAIN_DMA;
- if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
- return IOMMU_DOMAIN_DMA;
- } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
- return IOMMU_DOMAIN_DMA;
}
return 0;
@@ -3297,6 +3054,85 @@ out_unmap:
return ret;
}
+#ifdef CONFIG_INTEL_IOMMU_SVM
+static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
+{
+ struct intel_iommu *iommu = data;
+ ioasid_t ioasid;
+
+ if (!iommu)
+ return INVALID_IOASID;
+ /*
+ * VT-d virtual command interface always uses the full 20 bit
+ * PASID range. Host can partition guest PASID range based on
+ * policies but it is out of guest's control.
+ */
+ if (min < PASID_MIN || max > intel_pasid_max_id)
+ return INVALID_IOASID;
+
+ if (vcmd_alloc_pasid(iommu, &ioasid))
+ return INVALID_IOASID;
+
+ return ioasid;
+}
+
+static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
+{
+ struct intel_iommu *iommu = data;
+
+ if (!iommu)
+ return;
+ /*
+ * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
+ * We can only free the PASID when all the devices are unbound.
+ */
+ if (ioasid_find(NULL, ioasid, NULL)) {
+ pr_alert("Cannot free active IOASID %d\n", ioasid);
+ return;
+ }
+ vcmd_free_pasid(iommu, ioasid);
+}
+
+static void register_pasid_allocator(struct intel_iommu *iommu)
+{
+ /*
+ * If we are running in the host, no need for custom allocator
+ * in that PASIDs are allocated from the host system-wide.
+ */
+ if (!cap_caching_mode(iommu->cap))
+ return;
+
+ if (!sm_supported(iommu)) {
+ pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
+ return;
+ }
+
+ /*
+ * Register a custom PASID allocator if we are running in a guest,
+ * guest PASID must be obtained via virtual command interface.
+ * There can be multiple vIOMMUs in each guest but only one allocator
+ * is active. All vIOMMU allocators will eventually be calling the same
+ * host allocator.
+ */
+ if (!ecap_vcs(iommu->ecap) || !vccap_pasid(iommu->vccap))
+ return;
+
+ pr_info("Register custom PASID allocator\n");
+ iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
+ iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
+ iommu->pasid_allocator.pdata = (void *)iommu;
+ if (ioasid_register_allocator(&iommu->pasid_allocator)) {
+ pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
+ /*
+ * Disable scalable mode on this IOMMU if there
+ * is no custom allocator. Mixing SM capable vIOMMU
+ * and non-SM vIOMMU are not supported.
+ */
+ intel_iommu_sm = 0;
+ }
+}
+#endif
+
static int __init init_dmars(void)
{
struct dmar_drhd_unit *drhd;
@@ -3414,6 +3250,9 @@ static int __init init_dmars(void)
*/
for_each_active_iommu(iommu, drhd) {
iommu_flush_write_buffer(iommu);
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ register_pasid_allocator(iommu);
+#endif
iommu_set_root_entry(iommu);
iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
@@ -3531,100 +3370,6 @@ static unsigned long intel_alloc_iova(struct device *dev,
return iova_pfn;
}
-static struct dmar_domain *get_private_domain_for_dev(struct device *dev)
-{
- struct dmar_domain *domain, *tmp;
- struct dmar_rmrr_unit *rmrr;
- struct device *i_dev;
- int i, ret;
-
- /* Device shouldn't be attached by any domains. */
- domain = find_domain(dev);
- if (domain)
- return NULL;
-
- domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
- if (!domain)
- goto out;
-
- /* We have a new domain - setup possible RMRRs for the device */
- rcu_read_lock();
- for_each_rmrr_units(rmrr) {
- for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
- i, i_dev) {
- if (i_dev != dev)
- continue;
-
- ret = domain_prepare_identity_map(dev, domain,
- rmrr->base_address,
- rmrr->end_address);
- if (ret)
- dev_err(dev, "Mapping reserved region failed\n");
- }
- }
- rcu_read_unlock();
-
- tmp = set_domain_for_dev(dev, domain);
- if (!tmp || domain != tmp) {
- domain_exit(domain);
- domain = tmp;
- }
-
-out:
- if (!domain)
- dev_err(dev, "Allocating domain failed\n");
- else
- domain->domain.type = IOMMU_DOMAIN_DMA;
-
- return domain;
-}
-
-/* Check if the dev needs to go through non-identity map and unmap process.*/
-static bool iommu_need_mapping(struct device *dev)
-{
- int ret;
-
- if (iommu_dummy(dev))
- return false;
-
- if (unlikely(attach_deferred(dev)))
- do_deferred_attach(dev);
-
- ret = identity_mapping(dev);
- if (ret) {
- u64 dma_mask = *dev->dma_mask;
-
- if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
- dma_mask = dev->coherent_dma_mask;
-
- if (dma_mask >= dma_direct_get_required_mask(dev))
- return false;
-
- /*
- * 32 bit DMA is removed from si_domain and fall back to
- * non-identity mapping.
- */
- dmar_remove_one_dev_info(dev);
- ret = iommu_request_dma_domain_for_dev(dev);
- if (ret) {
- struct iommu_domain *domain;
- struct dmar_domain *dmar_domain;
-
- domain = iommu_get_domain_for_dev(dev);
- if (domain) {
- dmar_domain = to_dmar_domain(domain);
- dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
- }
- dmar_remove_one_dev_info(dev);
- get_private_domain_for_dev(dev);
- }
-
- dev_info(dev, "32bit DMA uses non-identity mapping\n");
- }
-
- return true;
-}
-
static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
size_t size, int dir, u64 dma_mask)
{
@@ -3638,6 +3383,9 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
BUG_ON(dir == DMA_NONE);
+ if (unlikely(attach_deferred(dev)))
+ do_deferred_attach(dev);
+
domain = find_domain(dev);
if (!domain)
return DMA_MAPPING_ERROR;
@@ -3689,20 +3437,15 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page,
enum dma_data_direction dir,
unsigned long attrs)
{
- if (iommu_need_mapping(dev))
- return __intel_map_single(dev, page_to_phys(page) + offset,
- size, dir, *dev->dma_mask);
- return dma_direct_map_page(dev, page, offset, size, dir, attrs);
+ return __intel_map_single(dev, page_to_phys(page) + offset,
+ size, dir, *dev->dma_mask);
}
static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
- if (iommu_need_mapping(dev))
- return __intel_map_single(dev, phys_addr, size, dir,
- *dev->dma_mask);
- return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
+ return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask);
}
static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
@@ -3753,17 +3496,13 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
- if (iommu_need_mapping(dev))
- intel_unmap(dev, dev_addr, size);
- else
- dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
+ intel_unmap(dev, dev_addr, size);
}
static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
- if (iommu_need_mapping(dev))
- intel_unmap(dev, dev_addr, size);
+ intel_unmap(dev, dev_addr, size);
}
static void *intel_alloc_coherent(struct device *dev, size_t size,
@@ -3773,8 +3512,8 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
struct page *page = NULL;
int order;
- if (!iommu_need_mapping(dev))
- return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
+ if (unlikely(attach_deferred(dev)))
+ do_deferred_attach(dev);
size = PAGE_ALIGN(size);
order = get_order(size);
@@ -3809,9 +3548,6 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
int order;
struct page *page = virt_to_page(vaddr);
- if (!iommu_need_mapping(dev))
- return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
-
size = PAGE_ALIGN(size);
order = get_order(size);
@@ -3829,9 +3565,6 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
struct scatterlist *sg;
int i;
- if (!iommu_need_mapping(dev))
- return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
-
for_each_sg(sglist, sg, nelems, i) {
nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
}
@@ -3855,8 +3588,9 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
struct intel_iommu *iommu;
BUG_ON(dir == DMA_NONE);
- if (!iommu_need_mapping(dev))
- return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
+
+ if (unlikely(attach_deferred(dev)))
+ do_deferred_attach(dev);
domain = find_domain(dev);
if (!domain)
@@ -3903,8 +3637,6 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
static u64 intel_get_required_mask(struct device *dev)
{
- if (!iommu_need_mapping(dev))
- return dma_direct_get_required_mask(dev);
return DMA_BIT_MASK(32);
}
@@ -4813,58 +4545,37 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb,
unsigned long val, void *v)
{
struct memory_notify *mhp = v;
- unsigned long long start, end;
- unsigned long start_vpfn, last_vpfn;
+ unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
+ unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
+ mhp->nr_pages - 1);
switch (val) {
case MEM_GOING_ONLINE:
- start = mhp->start_pfn << PAGE_SHIFT;
- end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
- if (iommu_domain_identity_map(si_domain, start, end)) {
- pr_warn("Failed to build identity map for [%llx-%llx]\n",
- start, end);
+ if (iommu_domain_identity_map(si_domain,
+ start_vpfn, last_vpfn)) {
+ pr_warn("Failed to build identity map for [%lx-%lx]\n",
+ start_vpfn, last_vpfn);
return NOTIFY_BAD;
}
break;
case MEM_OFFLINE:
case MEM_CANCEL_ONLINE:
- start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
- last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
- while (start_vpfn <= last_vpfn) {
- struct iova *iova;
+ {
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
struct page *freelist;
- iova = find_iova(&si_domain->iovad, start_vpfn);
- if (iova == NULL) {
- pr_debug("Failed get IOVA for PFN %lx\n",
- start_vpfn);
- break;
- }
-
- iova = split_and_remove_iova(&si_domain->iovad, iova,
- start_vpfn, last_vpfn);
- if (iova == NULL) {
- pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
- start_vpfn, last_vpfn);
- return NOTIFY_BAD;
- }
-
- freelist = domain_unmap(si_domain, iova->pfn_lo,
- iova->pfn_hi);
+ freelist = domain_unmap(si_domain,
+ start_vpfn, last_vpfn);
rcu_read_lock();
for_each_active_iommu(iommu, drhd)
iommu_flush_iotlb_psi(iommu, si_domain,
- iova->pfn_lo, iova_size(iova),
+ start_vpfn, mhp->nr_pages,
!freelist, 0);
rcu_read_unlock();
dma_free_pagelist(freelist);
-
- start_vpfn = iova->pfn_hi + 1;
- free_iova_mem(iova);
}
break;
}
@@ -4892,8 +4603,9 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
for (did = 0; did < cap_ndoms(iommu->cap); did++) {
domain = get_iommu_domain(iommu, (u16)did);
- if (!domain)
+ if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA)
continue;
+
free_cpu_cached_iovas(cpu, &domain->iovad);
}
}
@@ -5186,18 +4898,6 @@ int __init intel_iommu_init(void)
}
up_write(&dmar_global_lock);
-#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
- /*
- * If the system has no untrusted device or the user has decided
- * to disable the bounce page mechanisms, we don't need swiotlb.
- * Mark this and the pre-allocated bounce pages will be released
- * later.
- */
- if (!has_untrusted_dev() || intel_no_bounce)
- swiotlb = 0;
-#endif
- dma_ops = &intel_dma_ops;
-
init_iommu_pm_ops();
down_read(&dmar_global_lock);
@@ -5283,10 +4983,11 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info)
if (info->dev) {
if (dev_is_pci(info->dev) && sm_supported(iommu))
intel_pasid_tear_down_entry(iommu, info->dev,
- PASID_RID2PASID);
+ PASID_RID2PASID, false);
iommu_disable_dev_iotlb(info);
- domain_context_clear(iommu, info->dev);
+ if (!dev_is_real_dma_subdevice(info->dev))
+ domain_context_clear(iommu, info->dev);
intel_pasid_free_table(info->dev);
}
@@ -5296,12 +4997,6 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info)
domain_detach_iommu(domain, iommu);
spin_unlock_irqrestore(&iommu->lock, flags);
- /* free the private domain */
- if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
- !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
- list_empty(&domain->devices))
- domain_exit(info->domain);
-
free_devinfo_mem(info);
}
@@ -5311,9 +5006,8 @@ static void dmar_remove_one_dev_info(struct device *dev)
unsigned long flags;
spin_lock_irqsave(&device_domain_lock, flags);
- info = dev->archdata.iommu;
- if (info && info != DEFER_DEVICE_DOMAIN_INFO
- && info != DUMMY_DEVICE_DOMAIN_INFO)
+ info = get_domain_info(dev);
+ if (info)
__dmar_remove_one_dev_info(info);
spin_unlock_irqrestore(&device_domain_lock, flags);
}
@@ -5322,9 +5016,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
{
int adjust_width;
- init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
- domain_reserve_special_ranges(domain);
-
/* calculate AGAW */
domain->gaw = guest_width;
adjust_width = guestwidth_to_adjustwidth(guest_width);
@@ -5343,11 +5034,21 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
return 0;
}
+static void intel_init_iova_domain(struct dmar_domain *dmar_domain)
+{
+ init_iova_domain(&dmar_domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
+ copy_reserved_iova(&reserved_iova_list, &dmar_domain->iovad);
+
+ if (!intel_iommu_strict &&
+ init_iova_flush_queue(&dmar_domain->iovad,
+ iommu_flush_iova, iova_entry_free))
+ pr_info("iova flush queue initialization failed\n");
+}
+
static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
{
struct dmar_domain *dmar_domain;
struct iommu_domain *domain;
- int ret;
switch (type) {
case IOMMU_DOMAIN_DMA:
@@ -5364,13 +5065,8 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
return NULL;
}
- if (!intel_iommu_strict && type == IOMMU_DOMAIN_DMA) {
- ret = init_iova_flush_queue(&dmar_domain->iovad,
- iommu_flush_iova,
- iova_entry_free);
- if (ret)
- pr_info("iova flush queue initialization failed\n");
- }
+ if (type == IOMMU_DOMAIN_DMA)
+ intel_init_iova_domain(dmar_domain);
domain_update_iommu_cap(dmar_domain);
@@ -5403,7 +5099,7 @@ static void intel_iommu_domain_free(struct iommu_domain *domain)
static inline bool
is_aux_domain(struct device *dev, struct iommu_domain *domain)
{
- struct device_domain_info *info = dev->archdata.iommu;
+ struct device_domain_info *info = get_domain_info(dev);
return info && info->auxd_enabled &&
domain->type == IOMMU_DOMAIN_UNMANAGED;
@@ -5412,7 +5108,7 @@ is_aux_domain(struct device *dev, struct iommu_domain *domain)
static void auxiliary_link_device(struct dmar_domain *domain,
struct device *dev)
{
- struct device_domain_info *info = dev->archdata.iommu;
+ struct device_domain_info *info = get_domain_info(dev);
assert_spin_locked(&device_domain_lock);
if (WARN_ON(!info))
@@ -5425,7 +5121,7 @@ static void auxiliary_link_device(struct dmar_domain *domain,
static void auxiliary_unlink_device(struct dmar_domain *domain,
struct device *dev)
{
- struct device_domain_info *info = dev->archdata.iommu;
+ struct device_domain_info *info = get_domain_info(dev);
assert_spin_locked(&device_domain_lock);
if (WARN_ON(!info))
@@ -5513,13 +5209,13 @@ static void aux_domain_remove_dev(struct dmar_domain *domain,
return;
spin_lock_irqsave(&device_domain_lock, flags);
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
iommu = info->iommu;
auxiliary_unlink_device(domain, dev);
spin_lock(&iommu->lock);
- intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
+ intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false);
domain_detach_iommu(domain, iommu);
spin_unlock(&iommu->lock);
@@ -5626,6 +5322,176 @@ static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
aux_domain_remove_dev(to_dmar_domain(domain), dev);
}
+/*
+ * 2D array for converting and sanitizing IOMMU generic TLB granularity to
+ * VT-d granularity. Invalidation is typically included in the unmap operation
+ * as a result of DMA or VFIO unmap. However, for assigned devices guest
+ * owns the first level page tables. Invalidations of translation caches in the
+ * guest are trapped and passed down to the host.
+ *
+ * vIOMMU in the guest will only expose first level page tables, therefore
+ * we do not support IOTLB granularity for request without PASID (second level).
+ *
+ * For example, to find the VT-d granularity encoding for IOTLB
+ * type and page selective granularity within PASID:
+ * X: indexed by iommu cache type
+ * Y: indexed by enum iommu_inv_granularity
+ * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR]
+ */
+
+static const int
+inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = {
+ /*
+ * PASID based IOTLB invalidation: PASID selective (per PASID),
+ * page selective (address granularity)
+ */
+ {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID},
+ /* PASID based dev TLBs */
+ {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL},
+ /* PASID cache */
+ {-EINVAL, -EINVAL, -EINVAL}
+};
+
+static inline int to_vtd_granularity(int type, int granu)
+{
+ return inv_type_granu_table[type][granu];
+}
+
+static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules)
+{
+ u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT;
+
+ /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
+ * IOMMU cache invalidate API passes granu_size in bytes, and number of
+ * granu size in contiguous memory.
+ */
+ return order_base_2(nr_pages);
+}
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
+static int
+intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
+ struct iommu_cache_invalidate_info *inv_info)
+{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ struct device_domain_info *info;
+ struct intel_iommu *iommu;
+ unsigned long flags;
+ int cache_type;
+ u8 bus, devfn;
+ u16 did, sid;
+ int ret = 0;
+ u64 size = 0;
+
+ if (!inv_info || !dmar_domain ||
+ inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1)
+ return -EINVAL;
+
+ if (!dev || !dev_is_pci(dev))
+ return -ENODEV;
+
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if (!iommu)
+ return -ENODEV;
+
+ if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE))
+ return -EINVAL;
+
+ spin_lock_irqsave(&device_domain_lock, flags);
+ spin_lock(&iommu->lock);
+ info = get_domain_info(dev);
+ if (!info) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ did = dmar_domain->iommu_did[iommu->seq_id];
+ sid = PCI_DEVID(bus, devfn);
+
+ /* Size is only valid in address selective invalidation */
+ if (inv_info->granularity != IOMMU_INV_GRANU_PASID)
+ size = to_vtd_size(inv_info->addr_info.granule_size,
+ inv_info->addr_info.nb_granules);
+
+ for_each_set_bit(cache_type,
+ (unsigned long *)&inv_info->cache,
+ IOMMU_CACHE_INV_TYPE_NR) {
+ int granu = 0;
+ u64 pasid = 0;
+
+ granu = to_vtd_granularity(cache_type, inv_info->granularity);
+ if (granu == -EINVAL) {
+ pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n",
+ cache_type, inv_info->granularity);
+ break;
+ }
+
+ /*
+ * PASID is stored in different locations based on the
+ * granularity.
+ */
+ if (inv_info->granularity == IOMMU_INV_GRANU_PASID &&
+ (inv_info->pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
+ pasid = inv_info->pasid_info.pasid;
+ else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
+ (inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
+ pasid = inv_info->addr_info.pasid;
+
+ switch (BIT(cache_type)) {
+ case IOMMU_CACHE_INV_TYPE_IOTLB:
+ if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
+ size &&
+ (inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
+ pr_err_ratelimited("Address out of range, 0x%llx, size order %llu\n",
+ inv_info->addr_info.addr, size);
+ ret = -ERANGE;
+ goto out_unlock;
+ }
+
+ /*
+ * If granu is PASID-selective, address is ignored.
+ * We use npages = -1 to indicate that.
+ */
+ qi_flush_piotlb(iommu, did, pasid,
+ mm_to_dma_pfn(inv_info->addr_info.addr),
+ (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
+ inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
+
+ /*
+ * Always flush device IOTLB if ATS is enabled. vIOMMU
+ * in the guest may assume IOTLB flush is inclusive,
+ * which is more efficient.
+ */
+ if (info->ats_enabled)
+ qi_flush_dev_iotlb_pasid(iommu, sid,
+ info->pfsid, pasid,
+ info->ats_qdep,
+ inv_info->addr_info.addr,
+ size, granu);
+ break;
+ case IOMMU_CACHE_INV_TYPE_DEV_IOTLB:
+ if (info->ats_enabled)
+ qi_flush_dev_iotlb_pasid(iommu, sid,
+ info->pfsid, pasid,
+ info->ats_qdep,
+ inv_info->addr_info.addr,
+ size, granu);
+ else
+ pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n");
+ break;
+ default:
+ dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n",
+ cache_type);
+ ret = -EINVAL;
+ }
+ }
+out_unlock:
+ spin_unlock(&iommu->lock);
+ spin_unlock_irqrestore(&device_domain_lock, flags);
+
+ return ret;
+}
+#endif
+
static int intel_iommu_map(struct iommu_domain *domain,
unsigned long iova, phys_addr_t hpa,
size_t size, int iommu_prot, gfp_t gfp)
@@ -5793,11 +5659,6 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
if (translation_pre_enabled(iommu))
dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
- if (device_needs_bounce(dev)) {
- dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n");
- set_dma_ops(dev, &bounce_dma_ops);
- }
-
return &iommu->iommu;
}
@@ -5812,7 +5673,19 @@ static void intel_iommu_release_device(struct device *dev)
dmar_remove_one_dev_info(dev);
+ set_dma_ops(dev, NULL);
+}
+
+static void intel_iommu_probe_finalize(struct device *dev)
+{
+ struct iommu_domain *domain;
+
+ domain = iommu_get_domain_for_dev(dev);
if (device_needs_bounce(dev))
+ set_dma_ops(dev, &bounce_dma_ops);
+ else if (domain && domain->type == IOMMU_DOMAIN_DMA)
+ set_dma_ops(dev, &intel_dma_ops);
+ else
set_dma_ops(dev, NULL);
}
@@ -5890,7 +5763,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
spin_lock(&iommu->lock);
ret = -EINVAL;
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (!info || !info->pasid_supported)
goto out;
@@ -5986,7 +5859,7 @@ static int intel_iommu_enable_auxd(struct device *dev)
return -ENODEV;
spin_lock_irqsave(&device_domain_lock, flags);
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
info->auxd_enabled = 1;
spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -5999,7 +5872,7 @@ static int intel_iommu_disable_auxd(struct device *dev)
unsigned long flags;
spin_lock_irqsave(&device_domain_lock, flags);
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (!WARN_ON(!info))
info->auxd_enabled = 0;
spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -6052,6 +5925,14 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
return !!siov_find_pci_dvsec(to_pci_dev(dev));
}
+ if (feat == IOMMU_DEV_FEAT_SVA) {
+ struct device_domain_info *info = get_domain_info(dev);
+
+ return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) &&
+ info->pasid_supported && info->pri_supported &&
+ info->ats_supported;
+ }
+
return false;
}
@@ -6061,6 +5942,16 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
if (feat == IOMMU_DEV_FEAT_AUX)
return intel_iommu_enable_auxd(dev);
+ if (feat == IOMMU_DEV_FEAT_SVA) {
+ struct device_domain_info *info = get_domain_info(dev);
+
+ if (!info)
+ return -EINVAL;
+
+ if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE)
+ return 0;
+ }
+
return -ENODEV;
}
@@ -6076,7 +5967,7 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
static bool
intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
{
- struct device_domain_info *info = dev->archdata.iommu;
+ struct device_domain_info *info = get_domain_info(dev);
if (feat == IOMMU_DEV_FEAT_AUX)
return scalable_mode_support() && info && info->auxd_enabled;
@@ -6144,6 +6035,7 @@ const struct iommu_ops intel_iommu_ops = {
.unmap = intel_iommu_unmap,
.iova_to_phys = intel_iommu_iova_to_phys,
.probe_device = intel_iommu_probe_device,
+ .probe_finalize = intel_iommu_probe_finalize,
.release_device = intel_iommu_release_device,
.get_resv_regions = intel_iommu_get_resv_regions,
.put_resv_regions = generic_iommu_put_resv_regions,
@@ -6156,6 +6048,14 @@ const struct iommu_ops intel_iommu_ops = {
.is_attach_deferred = intel_iommu_is_attach_deferred,
.def_domain_type = device_def_domain_type,
.pgsize_bitmap = INTEL_IOMMU_PGSIZES,
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ .cache_invalidate = intel_iommu_sva_invalidate,
+ .sva_bind_gpasid = intel_svm_bind_gpasid,
+ .sva_unbind_gpasid = intel_svm_unbind_gpasid,
+ .sva_bind = intel_svm_bind,
+ .sva_unbind = intel_svm_unbind,
+ .sva_get_pasid = intel_svm_get_pasid,
+#endif
};
static void quirk_iommu_igfx(struct pci_dev *dev)
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index 22b30f10b396..c81f0f17c6ba 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -27,6 +27,63 @@
static DEFINE_SPINLOCK(pasid_lock);
u32 intel_pasid_max_id = PASID_MAX;
+int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid)
+{
+ unsigned long flags;
+ u8 status_code;
+ int ret = 0;
+ u64 res;
+
+ raw_spin_lock_irqsave(&iommu->register_lock, flags);
+ dmar_writeq(iommu->reg + DMAR_VCMD_REG, VCMD_CMD_ALLOC);
+ IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq,
+ !(res & VCMD_VRSP_IP), res);
+ raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ status_code = VCMD_VRSP_SC(res);
+ switch (status_code) {
+ case VCMD_VRSP_SC_SUCCESS:
+ *pasid = VCMD_VRSP_RESULT_PASID(res);
+ break;
+ case VCMD_VRSP_SC_NO_PASID_AVAIL:
+ pr_info("IOMMU: %s: No PASID available\n", iommu->name);
+ ret = -ENOSPC;
+ break;
+ default:
+ ret = -ENODEV;
+ pr_warn("IOMMU: %s: Unexpected error code %d\n",
+ iommu->name, status_code);
+ }
+
+ return ret;
+}
+
+void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid)
+{
+ unsigned long flags;
+ u8 status_code;
+ u64 res;
+
+ raw_spin_lock_irqsave(&iommu->register_lock, flags);
+ dmar_writeq(iommu->reg + DMAR_VCMD_REG,
+ VCMD_CMD_OPERAND(pasid) | VCMD_CMD_FREE);
+ IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq,
+ !(res & VCMD_VRSP_IP), res);
+ raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ status_code = VCMD_VRSP_SC(res);
+ switch (status_code) {
+ case VCMD_VRSP_SC_SUCCESS:
+ break;
+ case VCMD_VRSP_SC_INVALID_PASID:
+ pr_info("IOMMU: %s: Invalid PASID\n", iommu->name);
+ break;
+ default:
+ pr_warn("IOMMU: %s: Unexpected error code %d\n",
+ iommu->name, status_code);
+ }
+}
+
/*
* Per device pasid table management:
*/
@@ -94,7 +151,7 @@ int intel_pasid_alloc_table(struct device *dev)
int size;
might_sleep();
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
return -EINVAL;
@@ -141,7 +198,7 @@ void intel_pasid_free_table(struct device *dev)
struct pasid_entry *table;
int i, max_pde;
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (!info || !dev_is_pci(dev) || !info->pasid_table)
return;
@@ -167,7 +224,7 @@ struct pasid_table *intel_pasid_get_table(struct device *dev)
{
struct device_domain_info *info;
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (!info)
return NULL;
@@ -178,7 +235,7 @@ int intel_pasid_get_dev_max_id(struct device *dev)
{
struct device_domain_info *info;
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (!info || !info->pasid_table)
return 0;
@@ -199,7 +256,7 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid)
return NULL;
dir = pasid_table->table;
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
dir_index = pasid >> PASID_PDE_SHIFT;
index = pasid & PASID_PTE_MASK;
@@ -235,7 +292,20 @@ static inline void pasid_clear_entry(struct pasid_entry *pe)
WRITE_ONCE(pe->val[7], 0);
}
-static void intel_pasid_clear_entry(struct device *dev, int pasid)
+static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe)
+{
+ WRITE_ONCE(pe->val[0], PASID_PTE_FPD);
+ WRITE_ONCE(pe->val[1], 0);
+ WRITE_ONCE(pe->val[2], 0);
+ WRITE_ONCE(pe->val[3], 0);
+ WRITE_ONCE(pe->val[4], 0);
+ WRITE_ONCE(pe->val[5], 0);
+ WRITE_ONCE(pe->val[6], 0);
+ WRITE_ONCE(pe->val[7], 0);
+}
+
+static void
+intel_pasid_clear_entry(struct device *dev, int pasid, bool fault_ignore)
{
struct pasid_entry *pe;
@@ -243,7 +313,10 @@ static void intel_pasid_clear_entry(struct device *dev, int pasid)
if (WARN_ON(!pe))
return;
- pasid_clear_entry(pe);
+ if (fault_ignore && pasid_pte_is_present(pe))
+ pasid_clear_entry_with_fpd(pe);
+ else
+ pasid_clear_entry(pe);
}
static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
@@ -359,18 +432,29 @@ pasid_set_flpm(struct pasid_entry *pe, u64 value)
pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2);
}
+/*
+ * Setup the Extended Access Flag Enable (EAFE) field (Bit 135)
+ * of a scalable mode PASID entry.
+ */
+static inline void
+pasid_set_eafe(struct pasid_entry *pe)
+{
+ pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7);
+}
+
static void
pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
u16 did, int pasid)
{
struct qi_desc desc;
- desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid);
+ desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) |
+ QI_PC_PASID(pasid) | QI_PC_TYPE;
desc.qw1 = 0;
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
}
static void
@@ -384,7 +468,7 @@ iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid)
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, iommu);
+ qi_submit_sync(iommu, &desc, 1, 0);
}
static void
@@ -394,7 +478,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
struct device_domain_info *info;
u16 sid, qdep, pfsid;
- info = dev->archdata.iommu;
+ info = get_domain_info(dev);
if (!info || !info->ats_enabled)
return;
@@ -405,8 +489,8 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
}
-void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
- struct device *dev, int pasid)
+void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
+ int pasid, bool fault_ignore)
{
struct pasid_entry *pte;
u16 did;
@@ -416,7 +500,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
return;
did = pasid_get_domain_id(pte);
- intel_pasid_clear_entry(dev, pasid);
+ intel_pasid_clear_entry(dev, pasid, fault_ignore);
if (!ecap_coherent(iommu->ecap))
clflush_cache_range(pte, sizeof(*pte));
@@ -492,7 +576,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
/* Setup Present and PASID Granular Transfer Type: */
- pasid_set_translation_type(pte, 1);
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
pasid_set_present(pte);
pasid_flush_caches(iommu, pte, pasid, did);
@@ -500,6 +584,25 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
}
/*
+ * Skip top levels of page tables for iommu which has less agaw
+ * than default. Unnecessary for PT mode.
+ */
+static inline int iommu_skip_agaw(struct dmar_domain *domain,
+ struct intel_iommu *iommu,
+ struct dma_pte **pgd)
+{
+ int agaw;
+
+ for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
+ *pgd = phys_to_virt(dma_pte_addr(*pgd));
+ if (!dma_pte_present(*pgd))
+ return -EINVAL;
+ }
+
+ return agaw;
+}
+
+/*
* Set up the scalable mode pasid entry for second only translation type.
*/
int intel_pasid_setup_second_level(struct intel_iommu *iommu,
@@ -522,17 +625,11 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
return -EINVAL;
}
- /*
- * Skip top levels of page tables for iommu which has less agaw
- * than default. Unnecessary for PT mode.
- */
pgd = domain->pgd;
- for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
- pgd = phys_to_virt(dma_pte_addr(pgd));
- if (!dma_pte_present(pgd)) {
- dev_err(dev, "Invalid domain page table\n");
- return -EINVAL;
- }
+ agaw = iommu_skip_agaw(domain, iommu, &pgd);
+ if (agaw < 0) {
+ dev_err(dev, "Invalid domain page table\n");
+ return -EINVAL;
}
pgd_val = virt_to_phys(pgd);
@@ -548,7 +645,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
pasid_set_domain_id(pte, did);
pasid_set_slptr(pte, pgd_val);
pasid_set_address_width(pte, agaw);
- pasid_set_translation_type(pte, 2);
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
pasid_set_fault_enable(pte);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
@@ -582,7 +679,7 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
pasid_clear_entry(pte);
pasid_set_domain_id(pte, did);
pasid_set_address_width(pte, iommu->agaw);
- pasid_set_translation_type(pte, 4);
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT);
pasid_set_fault_enable(pte);
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
@@ -596,3 +693,161 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
return 0;
}
+
+static int
+intel_pasid_setup_bind_data(struct intel_iommu *iommu, struct pasid_entry *pte,
+ struct iommu_gpasid_bind_data_vtd *pasid_data)
+{
+ /*
+ * Not all guest PASID table entry fields are passed down during bind,
+ * here we only set up the ones that are dependent on guest settings.
+ * Execution related bits such as NXE, SMEP are not supported.
+ * Other fields, such as snoop related, are set based on host needs
+ * regardless of guest settings.
+ */
+ if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_SRE) {
+ if (!ecap_srs(iommu->ecap)) {
+ pr_err_ratelimited("No supervisor request support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+ pasid_set_sre(pte);
+ }
+
+ if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_EAFE) {
+ if (!ecap_eafs(iommu->ecap)) {
+ pr_err_ratelimited("No extended access flag support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+ pasid_set_eafe(pte);
+ }
+
+ /*
+ * Memory type is only applicable to devices inside processor coherent
+ * domain. Will add MTS support once coherent devices are available.
+ */
+ if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_MTS_MASK) {
+ pr_warn_ratelimited("No memory type support %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * intel_pasid_setup_nested() - Set up PASID entry for nested translation.
+ * This could be used for guest shared virtual address. In this case, the
+ * first level page tables are used for GVA-GPA translation in the guest,
+ * second level page tables are used for GPA-HPA translation.
+ *
+ * @iommu: IOMMU which the device belong to
+ * @dev: Device to be set up for translation
+ * @gpgd: FLPTPTR: First Level Page translation pointer in GPA
+ * @pasid: PASID to be programmed in the device PASID table
+ * @pasid_data: Additional PASID info from the guest bind request
+ * @domain: Domain info for setting up second level page tables
+ * @addr_width: Address width of the first level (guest)
+ */
+int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
+ pgd_t *gpgd, int pasid,
+ struct iommu_gpasid_bind_data_vtd *pasid_data,
+ struct dmar_domain *domain, int addr_width)
+{
+ struct pasid_entry *pte;
+ struct dma_pte *pgd;
+ int ret = 0;
+ u64 pgd_val;
+ int agaw;
+ u16 did;
+
+ if (!ecap_nest(iommu->ecap)) {
+ pr_err_ratelimited("IOMMU: %s: No nested translation support\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
+ if (!(domain->flags & DOMAIN_FLAG_NESTING_MODE)) {
+ pr_err_ratelimited("Domain is not in nesting mode, %x\n",
+ domain->flags);
+ return -EINVAL;
+ }
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (WARN_ON(!pte))
+ return -EINVAL;
+
+ /*
+ * Caller must ensure PASID entry is not in use, i.e. not bind the
+ * same PASID to the same device twice.
+ */
+ if (pasid_pte_is_present(pte))
+ return -EBUSY;
+
+ pasid_clear_entry(pte);
+
+ /* Sanity checking performed by caller to make sure address
+ * width matching in two dimensions:
+ * 1. CPU vs. IOMMU
+ * 2. Guest vs. Host.
+ */
+ switch (addr_width) {
+#ifdef CONFIG_X86
+ case ADDR_WIDTH_5LEVEL:
+ if (!cpu_feature_enabled(X86_FEATURE_LA57) ||
+ !cap_5lp_support(iommu->cap)) {
+ dev_err_ratelimited(dev,
+ "5-level paging not supported\n");
+ return -EINVAL;
+ }
+
+ pasid_set_flpm(pte, 1);
+ break;
+#endif
+ case ADDR_WIDTH_4LEVEL:
+ pasid_set_flpm(pte, 0);
+ break;
+ default:
+ dev_err_ratelimited(dev, "Invalid guest address width %d\n",
+ addr_width);
+ return -EINVAL;
+ }
+
+ /* First level PGD is in GPA, must be supported by the second level */
+ if ((uintptr_t)gpgd > domain->max_addr) {
+ dev_err_ratelimited(dev,
+ "Guest PGD %lx not supported, max %llx\n",
+ (uintptr_t)gpgd, domain->max_addr);
+ return -EINVAL;
+ }
+ pasid_set_flptr(pte, (uintptr_t)gpgd);
+
+ ret = intel_pasid_setup_bind_data(iommu, pte, pasid_data);
+ if (ret)
+ return ret;
+
+ /* Setup the second level based on the given domain */
+ pgd = domain->pgd;
+
+ agaw = iommu_skip_agaw(domain, iommu, &pgd);
+ if (agaw < 0) {
+ dev_err_ratelimited(dev, "Invalid domain page table\n");
+ return -EINVAL;
+ }
+ pgd_val = virt_to_phys(pgd);
+ pasid_set_slptr(pte, pgd_val);
+ pasid_set_fault_enable(pte);
+
+ did = domain->iommu_did[iommu->seq_id];
+ pasid_set_domain_id(pte, did);
+
+ pasid_set_address_width(pte, agaw);
+ pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
+
+ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED);
+ pasid_set_present(pte);
+ pasid_flush_caches(iommu, pte, pasid, did);
+
+ return ret;
+}
diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
index 92de6df24ccb..c5318d40e0fa 100644
--- a/drivers/iommu/intel-pasid.h
+++ b/drivers/iommu/intel-pasid.h
@@ -15,6 +15,7 @@
#define PASID_MAX 0x100000
#define PASID_PTE_MASK 0x3F
#define PASID_PTE_PRESENT 1
+#define PASID_PTE_FPD 2
#define PDE_PFN_MASK PAGE_MASK
#define PASID_PDE_SHIFT 6
#define MAX_NR_PASID_BITS 20
@@ -23,6 +24,16 @@
#define is_pasid_enabled(entry) (((entry)->lo >> 3) & 0x1)
#define get_pasid_dir_size(entry) (1 << ((((entry)->lo >> 9) & 0x7) + 7))
+/* Virtual command interface for enlightened pasid management. */
+#define VCMD_CMD_ALLOC 0x1
+#define VCMD_CMD_FREE 0x2
+#define VCMD_VRSP_IP 0x1
+#define VCMD_VRSP_SC(e) (((e) >> 1) & 0x3)
+#define VCMD_VRSP_SC_SUCCESS 0
+#define VCMD_VRSP_SC_NO_PASID_AVAIL 1
+#define VCMD_VRSP_SC_INVALID_PASID 1
+#define VCMD_VRSP_RESULT_PASID(e) (((e) >> 8) & 0xfffff)
+#define VCMD_CMD_OPERAND(e) ((e) << 8)
/*
* Domain ID reserved for pasid entries programmed for first-level
* only and pass-through transfer modes.
@@ -36,6 +47,7 @@
* to vmalloc or even module mappings.
*/
#define PASID_FLAG_SUPERVISOR_MODE BIT(0)
+#define PASID_FLAG_NESTED BIT(1)
/*
* The PASID_FLAG_FL5LP flag Indicates using 5-level paging for first-
@@ -51,6 +63,11 @@ struct pasid_entry {
u64 val[8];
};
+#define PASID_ENTRY_PGTT_FL_ONLY (1)
+#define PASID_ENTRY_PGTT_SL_ONLY (2)
+#define PASID_ENTRY_PGTT_NESTED (3)
+#define PASID_ENTRY_PGTT_PT (4)
+
/* The representative of a PASID table */
struct pasid_table {
void *table; /* pasid table pointer */
@@ -99,7 +116,13 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev, int pasid);
+int intel_pasid_setup_nested(struct intel_iommu *iommu,
+ struct device *dev, pgd_t *pgd, int pasid,
+ struct iommu_gpasid_bind_data_vtd *pasid_data,
+ struct dmar_domain *domain, int addr_width);
void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
- struct device *dev, int pasid);
-
+ struct device *dev, int pasid,
+ bool fault_ignore);
+int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid);
+void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid);
#endif /* __INTEL_PASID_H */
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 2998418f0a38..a035ef911fba 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -23,6 +23,7 @@
#include "intel-pasid.h"
static irqreturn_t prq_event_thread(int irq, void *d);
+static void intel_svm_drain_prq(struct device *dev, int pasid);
#define PRQ_ORDER 0
@@ -66,6 +67,8 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
+ init_completion(&iommu->prq_complete);
+
return 0;
}
@@ -138,7 +141,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
}
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, svm->iommu);
+ qi_submit_sync(svm->iommu, &desc, 1, 0);
if (sdev->dev_iotlb) {
desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) |
@@ -162,7 +165,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
}
desc.qw2 = 0;
desc.qw3 = 0;
- qi_submit_sync(&desc, svm->iommu);
+ qi_submit_sync(svm->iommu, &desc, 1, 0);
}
}
@@ -206,10 +209,9 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
* *has* to handle gracefully without affecting other processes.
*/
rcu_read_lock();
- list_for_each_entry_rcu(sdev, &svm->devs, list) {
- intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid);
- intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
- }
+ list_for_each_entry_rcu(sdev, &svm->devs, list)
+ intel_pasid_tear_down_entry(svm->iommu, sdev->dev,
+ svm->pasid, true);
rcu_read_unlock();
}
@@ -226,13 +228,212 @@ static LIST_HEAD(global_svm_list);
list_for_each_entry((sdev), &(svm)->devs, list) \
if ((d) != (sdev)->dev) {} else
-int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
+int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
+ struct iommu_gpasid_bind_data *data)
+{
+ struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ struct dmar_domain *dmar_domain;
+ struct intel_svm_dev *sdev;
+ struct intel_svm *svm;
+ int ret = 0;
+
+ if (WARN_ON(!iommu) || !data)
+ return -EINVAL;
+
+ if (data->version != IOMMU_GPASID_BIND_VERSION_1 ||
+ data->format != IOMMU_PASID_FORMAT_INTEL_VTD)
+ return -EINVAL;
+
+ if (!dev_is_pci(dev))
+ return -ENOTSUPP;
+
+ /* VT-d supports devices with full 20 bit PASIDs only */
+ if (pci_max_pasids(to_pci_dev(dev)) != PASID_MAX)
+ return -EINVAL;
+
+ /*
+ * We only check host PASID range, we have no knowledge to check
+ * guest PASID range.
+ */
+ if (data->hpasid <= 0 || data->hpasid >= PASID_MAX)
+ return -EINVAL;
+
+ dmar_domain = to_dmar_domain(domain);
+
+ mutex_lock(&pasid_mutex);
+ svm = ioasid_find(NULL, data->hpasid, NULL);
+ if (IS_ERR(svm)) {
+ ret = PTR_ERR(svm);
+ goto out;
+ }
+
+ if (svm) {
+ /*
+ * If we found svm for the PASID, there must be at
+ * least one device bond, otherwise svm should be freed.
+ */
+ if (WARN_ON(list_empty(&svm->devs))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ for_each_svm_dev(sdev, svm, dev) {
+ /*
+ * For devices with aux domains, we should allow
+ * multiple bind calls with the same PASID and pdev.
+ */
+ if (iommu_dev_feature_enabled(dev,
+ IOMMU_DEV_FEAT_AUX)) {
+ sdev->users++;
+ } else {
+ dev_warn_ratelimited(dev,
+ "Already bound with PASID %u\n",
+ svm->pasid);
+ ret = -EBUSY;
+ }
+ goto out;
+ }
+ } else {
+ /* We come here when PASID has never been bond to a device. */
+ svm = kzalloc(sizeof(*svm), GFP_KERNEL);
+ if (!svm) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ /* REVISIT: upper layer/VFIO can track host process that bind
+ * the PASID. ioasid_set = mm might be sufficient for vfio to
+ * check pasid VMM ownership. We can drop the following line
+ * once VFIO and IOASID set check is in place.
+ */
+ svm->mm = get_task_mm(current);
+ svm->pasid = data->hpasid;
+ if (data->flags & IOMMU_SVA_GPASID_VAL) {
+ svm->gpasid = data->gpasid;
+ svm->flags |= SVM_FLAG_GUEST_PASID;
+ }
+ ioasid_set_data(data->hpasid, svm);
+ INIT_LIST_HEAD_RCU(&svm->devs);
+ mmput(svm->mm);
+ }
+ sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+ if (!sdev) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ sdev->dev = dev;
+
+ /* Only count users if device has aux domains */
+ if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
+ sdev->users = 1;
+
+ /* Set up device context entry for PASID if not enabled already */
+ ret = intel_iommu_enable_pasid(iommu, sdev->dev);
+ if (ret) {
+ dev_err_ratelimited(dev, "Failed to enable PASID capability\n");
+ kfree(sdev);
+ goto out;
+ }
+
+ /*
+ * PASID table is per device for better security. Therefore, for
+ * each bind of a new device even with an existing PASID, we need to
+ * call the nested mode setup function here.
+ */
+ spin_lock(&iommu->lock);
+ ret = intel_pasid_setup_nested(iommu, dev,
+ (pgd_t *)(uintptr_t)data->gpgd,
+ data->hpasid, &data->vtd, dmar_domain,
+ data->addr_width);
+ spin_unlock(&iommu->lock);
+ if (ret) {
+ dev_err_ratelimited(dev, "Failed to set up PASID %llu in nested mode, Err %d\n",
+ data->hpasid, ret);
+ /*
+ * PASID entry should be in cleared state if nested mode
+ * set up failed. So we only need to clear IOASID tracking
+ * data such that free call will succeed.
+ */
+ kfree(sdev);
+ goto out;
+ }
+
+ svm->flags |= SVM_FLAG_GUEST_MODE;
+
+ init_rcu_head(&sdev->rcu);
+ list_add_rcu(&sdev->list, &svm->devs);
+ out:
+ if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) {
+ ioasid_set_data(data->hpasid, NULL);
+ kfree(svm);
+ }
+
+ mutex_unlock(&pasid_mutex);
+ return ret;
+}
+
+int intel_svm_unbind_gpasid(struct device *dev, int pasid)
+{
+ struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ struct intel_svm_dev *sdev;
+ struct intel_svm *svm;
+ int ret = -EINVAL;
+
+ if (WARN_ON(!iommu))
+ return -EINVAL;
+
+ mutex_lock(&pasid_mutex);
+ svm = ioasid_find(NULL, pasid, NULL);
+ if (!svm) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (IS_ERR(svm)) {
+ ret = PTR_ERR(svm);
+ goto out;
+ }
+
+ for_each_svm_dev(sdev, svm, dev) {
+ ret = 0;
+ if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
+ sdev->users--;
+ if (!sdev->users) {
+ list_del_rcu(&sdev->list);
+ intel_pasid_tear_down_entry(iommu, dev,
+ svm->pasid, false);
+ intel_svm_drain_prq(dev, svm->pasid);
+ kfree_rcu(sdev, rcu);
+
+ if (list_empty(&svm->devs)) {
+ /*
+ * We do not free the IOASID here in that
+ * IOMMU driver did not allocate it.
+ * Unlike native SVM, IOASID for guest use was
+ * allocated prior to the bind call.
+ * In any case, if the free call comes before
+ * the unbind, IOMMU driver will get notified
+ * and perform cleanup.
+ */
+ ioasid_set_data(pasid, NULL);
+ kfree(svm);
+ }
+ }
+ break;
+ }
+out:
+ mutex_unlock(&pasid_mutex);
+ return ret;
+}
+
+/* Caller must hold pasid_mutex, mm reference */
+static int
+intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops,
+ struct mm_struct *mm, struct intel_svm_dev **sd)
{
struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
struct device_domain_info *info;
struct intel_svm_dev *sdev;
struct intel_svm *svm = NULL;
- struct mm_struct *mm = NULL;
int pasid_max;
int ret;
@@ -249,16 +450,15 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
} else
pasid_max = 1 << 20;
+ /* Bind supervisor PASID shuld have mm = NULL */
if (flags & SVM_FLAG_SUPERVISOR_MODE) {
- if (!ecap_srs(iommu->ecap))
+ if (!ecap_srs(iommu->ecap) || mm) {
+ pr_err("Supervisor PASID with user provided mm.\n");
return -EINVAL;
- } else if (pasid) {
- mm = get_task_mm(current);
- BUG_ON(!mm);
+ }
}
- mutex_lock(&pasid_mutex);
- if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
+ if (!(flags & SVM_FLAG_PRIVATE_PASID)) {
struct intel_svm *t;
list_for_each_entry(t, &global_svm_list, list) {
@@ -296,19 +496,12 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
sdev->dev = dev;
ret = intel_iommu_enable_pasid(iommu, dev);
- if (ret || !pasid) {
- /* If they don't actually want to assign a PASID, this is
- * just an enabling check/preparation. */
- kfree(sdev);
- goto out;
- }
-
- info = dev->archdata.iommu;
- if (!info || !info->pasid_supported) {
+ if (ret) {
kfree(sdev);
goto out;
}
+ info = get_domain_info(dev);
sdev->did = FLPT_DEFAULT_DID;
sdev->sid = PCI_DEVID(info->bus, info->devfn);
if (info->ats_enabled) {
@@ -397,26 +590,24 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
}
}
list_add_rcu(&sdev->list, &svm->devs);
-
- success:
- *pasid = svm->pasid;
+success:
+ sdev->pasid = svm->pasid;
+ sdev->sva.dev = dev;
+ if (sd)
+ *sd = sdev;
ret = 0;
out:
- mutex_unlock(&pasid_mutex);
- if (mm)
- mmput(mm);
return ret;
}
-EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
-int intel_svm_unbind_mm(struct device *dev, int pasid)
+/* Caller must hold pasid_mutex */
+static int intel_svm_unbind_mm(struct device *dev, int pasid)
{
struct intel_svm_dev *sdev;
struct intel_iommu *iommu;
struct intel_svm *svm;
int ret = -EINVAL;
- mutex_lock(&pasid_mutex);
iommu = intel_svm_device_to_iommu(dev);
if (!iommu)
goto out;
@@ -442,8 +633,9 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
* to use. We have a *shared* PASID table, because it's
* large and has to be physically contiguous. So it's
* hard to be as defensive as we might like. */
- intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
- intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
+ intel_pasid_tear_down_entry(iommu, dev,
+ svm->pasid, false);
+ intel_svm_drain_prq(dev, svm->pasid);
kfree_rcu(sdev, rcu);
if (list_empty(&svm->devs)) {
@@ -462,45 +654,9 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
break;
}
out:
- mutex_unlock(&pasid_mutex);
return ret;
}
-EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
-
-int intel_svm_is_pasid_valid(struct device *dev, int pasid)
-{
- struct intel_iommu *iommu;
- struct intel_svm *svm;
- int ret = -EINVAL;
-
- mutex_lock(&pasid_mutex);
- iommu = intel_svm_device_to_iommu(dev);
- if (!iommu)
- goto out;
-
- svm = ioasid_find(NULL, pasid, NULL);
- if (!svm)
- goto out;
-
- if (IS_ERR(svm)) {
- ret = PTR_ERR(svm);
- goto out;
- }
- /* init_mm is used in this case */
- if (!svm->mm)
- ret = 1;
- else if (atomic_read(&svm->mm->mm_users) > 0)
- ret = 1;
- else
- ret = 0;
-
- out:
- mutex_unlock(&pasid_mutex);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid);
/* Page request queue descriptor */
struct page_req_dsc {
@@ -557,6 +713,93 @@ static bool is_canonical_address(u64 addr)
return (((saddr << shift) >> shift) == saddr);
}
+/**
+ * intel_svm_drain_prq - Drain page requests and responses for a pasid
+ * @dev: target device
+ * @pasid: pasid for draining
+ *
+ * Drain all pending page requests and responses related to @pasid in both
+ * software and hardware. This is supposed to be called after the device
+ * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
+ * and DevTLB have been invalidated.
+ *
+ * It waits until all pending page requests for @pasid in the page fault
+ * queue are completed by the prq handling thread. Then follow the steps
+ * described in VT-d spec CH7.10 to drain all page requests and page
+ * responses pending in the hardware.
+ */
+static void intel_svm_drain_prq(struct device *dev, int pasid)
+{
+ struct device_domain_info *info;
+ struct dmar_domain *domain;
+ struct intel_iommu *iommu;
+ struct qi_desc desc[3];
+ struct pci_dev *pdev;
+ int head, tail;
+ u16 sid, did;
+ int qdep;
+
+ info = get_domain_info(dev);
+ if (WARN_ON(!info || !dev_is_pci(dev)))
+ return;
+
+ if (!info->pri_enabled)
+ return;
+
+ iommu = info->iommu;
+ domain = info->domain;
+ pdev = to_pci_dev(dev);
+ sid = PCI_DEVID(info->bus, info->devfn);
+ did = domain->iommu_did[iommu->seq_id];
+ qdep = pci_ats_queue_depth(pdev);
+
+ /*
+ * Check and wait until all pending page requests in the queue are
+ * handled by the prq handling thread.
+ */
+prq_retry:
+ reinit_completion(&iommu->prq_complete);
+ tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
+ head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+ while (head != tail) {
+ struct page_req_dsc *req;
+
+ req = &iommu->prq[head / sizeof(*req)];
+ if (!req->pasid_present || req->pasid != pasid) {
+ head = (head + sizeof(*req)) & PRQ_RING_MASK;
+ continue;
+ }
+
+ wait_for_completion(&iommu->prq_complete);
+ goto prq_retry;
+ }
+
+ /*
+ * Perform steps described in VT-d spec CH7.10 to drain page
+ * requests and responses in hardware.
+ */
+ memset(desc, 0, sizeof(desc));
+ desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
+ QI_IWD_FENCE |
+ QI_IWD_TYPE;
+ desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
+ QI_EIOTLB_DID(did) |
+ QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
+ QI_EIOTLB_TYPE;
+ desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
+ QI_DEV_EIOTLB_SID(sid) |
+ QI_DEV_EIOTLB_QDEP(qdep) |
+ QI_DEIOTLB_TYPE |
+ QI_DEV_IOTLB_PFSID(info->pfsid);
+qi_retry:
+ reinit_completion(&iommu->prq_complete);
+ qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
+ if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
+ wait_for_completion(&iommu->prq_complete);
+ goto qi_retry;
+ }
+}
+
static irqreturn_t prq_event_thread(int irq, void *d)
{
struct intel_iommu *iommu = d;
@@ -685,12 +928,75 @@ static irqreturn_t prq_event_thread(int irq, void *d)
sizeof(req->priv_data));
resp.qw2 = 0;
resp.qw3 = 0;
- qi_submit_sync(&resp, iommu);
+ qi_submit_sync(iommu, &resp, 1, 0);
}
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
+ /*
+ * Clear the page request overflow bit and wake up all threads that
+ * are waiting for the completion of this handling.
+ */
+ if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO)
+ writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
+
+ if (!completion_done(&iommu->prq_complete))
+ complete(&iommu->prq_complete);
+
return IRQ_RETVAL(handled);
}
+
+#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
+struct iommu_sva *
+intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+{
+ struct iommu_sva *sva = ERR_PTR(-EINVAL);
+ struct intel_svm_dev *sdev = NULL;
+ int flags = 0;
+ int ret;
+
+ /*
+ * TODO: Consolidate with generic iommu-sva bind after it is merged.
+ * It will require shared SVM data structures, i.e. combine io_mm
+ * and intel_svm etc.
+ */
+ if (drvdata)
+ flags = *(int *)drvdata;
+ mutex_lock(&pasid_mutex);
+ ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev);
+ if (ret)
+ sva = ERR_PTR(ret);
+ else if (sdev)
+ sva = &sdev->sva;
+ else
+ WARN(!sdev, "SVM bind succeeded with no sdev!\n");
+
+ mutex_unlock(&pasid_mutex);
+
+ return sva;
+}
+
+void intel_svm_unbind(struct iommu_sva *sva)
+{
+ struct intel_svm_dev *sdev;
+
+ mutex_lock(&pasid_mutex);
+ sdev = to_intel_svm_dev(sva);
+ intel_svm_unbind_mm(sdev->dev, sdev->pasid);
+ mutex_unlock(&pasid_mutex);
+}
+
+int intel_svm_get_pasid(struct iommu_sva *sva)
+{
+ struct intel_svm_dev *sdev;
+ int pasid;
+
+ mutex_lock(&pasid_mutex);
+ sdev = to_intel_svm_dev(sva);
+ pasid = sdev->pasid;
+ mutex_unlock(&pasid_mutex);
+
+ return pasid;
+}
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 81e43c1df7ec..a042f123b091 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -151,7 +151,7 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
desc.qw2 = 0;
desc.qw3 = 0;
- return qi_submit_sync(&desc, iommu);
+ return qi_submit_sync(iommu, &desc, 1, 0);
}
static int modify_irte(struct irq_2_iommu *irq_iommu,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index a9e5618cde80..b5ea203f6c68 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -80,7 +80,8 @@ static bool iommu_cmd_line_dma_api(void)
return !!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API);
}
-static int iommu_alloc_default_domain(struct device *dev);
+static int iommu_alloc_default_domain(struct iommu_group *group,
+ struct device *dev);
static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
unsigned type);
static int __iommu_attach_device(struct iommu_domain *domain,
@@ -184,6 +185,7 @@ static struct dev_iommu *dev_iommu_get(struct device *dev)
static void dev_iommu_free(struct device *dev)
{
+ iommu_fwspec_free(dev);
kfree(dev->iommu);
dev->iommu = NULL;
}
@@ -195,6 +197,9 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
struct iommu_group *group;
int ret;
+ if (!ops)
+ return -ENODEV;
+
if (!dev_iommu_get(dev))
return -ENOMEM;
@@ -247,17 +252,17 @@ int iommu_probe_device(struct device *dev)
if (ret)
goto err_out;
+ group = iommu_group_get(dev);
+ if (!group)
+ goto err_release;
+
/*
* Try to allocate a default domain - needs support from the
* IOMMU driver. There are still some drivers which don't
* support default domains, so the return value is not yet
* checked.
*/
- iommu_alloc_default_domain(dev);
-
- group = iommu_group_get(dev);
- if (!group)
- goto err_release;
+ iommu_alloc_default_domain(group, dev);
if (group->default_domain)
ret = __iommu_attach_device(group->default_domain, dev);
@@ -582,7 +587,7 @@ struct iommu_group *iommu_group_alloc(void)
NULL, "%d", group->id);
if (ret) {
ida_simple_remove(&iommu_group_ida, group->id);
- kfree(group);
+ kobject_put(&group->kobj);
return ERR_PTR(ret);
}
@@ -765,6 +770,15 @@ out:
return ret;
}
+static bool iommu_is_attach_deferred(struct iommu_domain *domain,
+ struct device *dev)
+{
+ if (domain->ops->is_attach_deferred)
+ return domain->ops->is_attach_deferred(domain, dev);
+
+ return false;
+}
+
/**
* iommu_group_add_device - add a device to an iommu group
* @group: the group into which to add the device (reference should be held)
@@ -817,7 +831,7 @@ rename:
mutex_lock(&group->mutex);
list_add_tail(&device->list, &group->devices);
- if (group->domain)
+ if (group->domain && !iommu_is_attach_deferred(group->domain, dev))
ret = __iommu_attach_device(group->domain, dev);
mutex_unlock(&group->mutex);
if (ret)
@@ -1474,15 +1488,11 @@ static int iommu_group_alloc_default_domain(struct bus_type *bus,
return 0;
}
-static int iommu_alloc_default_domain(struct device *dev)
+static int iommu_alloc_default_domain(struct iommu_group *group,
+ struct device *dev)
{
- struct iommu_group *group;
unsigned int type;
- group = iommu_group_get(dev);
- if (!group)
- return -ENODEV;
-
if (group->default_domain)
return 0;
@@ -1670,17 +1680,8 @@ static void probe_alloc_default_domain(struct bus_type *bus,
static int iommu_group_do_dma_attach(struct device *dev, void *data)
{
struct iommu_domain *domain = data;
- const struct iommu_ops *ops;
- int ret;
-
- ret = __iommu_attach_device(domain, dev);
-
- ops = domain->ops;
- if (ret == 0 && ops->probe_finalize)
- ops->probe_finalize(dev);
-
- return ret;
+ return __iommu_attach_device(domain, dev);
}
static int __iommu_group_dma_attach(struct iommu_group *group)
@@ -1689,6 +1690,22 @@ static int __iommu_group_dma_attach(struct iommu_group *group)
iommu_group_do_dma_attach);
}
+static int iommu_group_do_probe_finalize(struct device *dev, void *data)
+{
+ struct iommu_domain *domain = data;
+
+ if (domain->ops->probe_finalize)
+ domain->ops->probe_finalize(dev);
+
+ return 0;
+}
+
+static void __iommu_group_dma_finalize(struct iommu_group *group)
+{
+ __iommu_group_for_each_dev(group, group->default_domain,
+ iommu_group_do_probe_finalize);
+}
+
static int iommu_do_create_direct_mappings(struct device *dev, void *data)
{
struct iommu_group *group = data;
@@ -1741,6 +1758,8 @@ int bus_iommu_probe(struct bus_type *bus)
if (ret)
break;
+
+ __iommu_group_dma_finalize(group);
}
return ret;
@@ -1889,9 +1908,6 @@ static int __iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
int ret;
- if ((domain->ops->is_attach_deferred != NULL) &&
- domain->ops->is_attach_deferred(domain, dev))
- return 0;
if (unlikely(domain->ops->attach_dev == NULL))
return -ENODEV;
@@ -1963,8 +1979,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_unbind_gpasid);
static void __iommu_detach_device(struct iommu_domain *domain,
struct device *dev)
{
- if ((domain->ops->is_attach_deferred != NULL) &&
- domain->ops->is_attach_deferred(domain, dev))
+ if (iommu_is_attach_deferred(domain, dev))
return;
if (unlikely(domain->ops->detach_dev == NULL))
@@ -2532,71 +2547,6 @@ struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
}
EXPORT_SYMBOL_GPL(iommu_alloc_resv_region);
-static int
-request_default_domain_for_dev(struct device *dev, unsigned long type)
-{
- struct iommu_domain *domain;
- struct iommu_group *group;
- int ret;
-
- /* Device must already be in a group before calling this function */
- group = iommu_group_get(dev);
- if (!group)
- return -EINVAL;
-
- mutex_lock(&group->mutex);
-
- ret = 0;
- if (group->default_domain && group->default_domain->type == type)
- goto out;
-
- /* Don't change mappings of existing devices */
- ret = -EBUSY;
- if (iommu_group_device_count(group) != 1)
- goto out;
-
- ret = -ENOMEM;
- domain = __iommu_domain_alloc(dev->bus, type);
- if (!domain)
- goto out;
-
- /* Attach the device to the domain */
- ret = __iommu_attach_group(domain, group);
- if (ret) {
- iommu_domain_free(domain);
- goto out;
- }
-
- /* Make the domain the default for this group */
- if (group->default_domain)
- iommu_domain_free(group->default_domain);
- group->default_domain = domain;
-
- iommu_create_device_direct_mappings(group, dev);
-
- dev_info(dev, "Using iommu %s mapping\n",
- type == IOMMU_DOMAIN_DMA ? "dma" : "direct");
-
- ret = 0;
-out:
- mutex_unlock(&group->mutex);
- iommu_group_put(group);
-
- return ret;
-}
-
-/* Request that a device is direct mapped by the IOMMU */
-int iommu_request_dm_for_dev(struct device *dev)
-{
- return request_default_domain_for_dev(dev, IOMMU_DOMAIN_IDENTITY);
-}
-
-/* Request that a device can't be direct mapped by the IOMMU */
-int iommu_request_dma_domain_for_dev(struct device *dev)
-{
- return request_default_domain_for_dev(dev, IOMMU_DOMAIN_DMA);
-}
-
void iommu_set_default_passthrough(bool cmd_line)
{
if (cmd_line)
@@ -2874,17 +2824,6 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
}
EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
-int iommu_sva_set_ops(struct iommu_sva *handle,
- const struct iommu_sva_ops *sva_ops)
-{
- if (handle->ops && handle->ops != sva_ops)
- return -EEXIST;
-
- handle->ops = sva_ops;
- return 0;
-}
-EXPORT_SYMBOL_GPL(iommu_sva_set_ops);
-
int iommu_sva_get_pasid(struct iommu_sva *handle)
{
const struct iommu_ops *ops = handle->dev->bus->iommu_ops;
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 0e6a9536eca6..49fc01f2a28d 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -253,7 +253,7 @@ int iova_cache_get(void)
SLAB_HWCACHE_ALIGN, NULL);
if (!iova_cache) {
mutex_unlock(&iova_cache_mutex);
- printk(KERN_ERR "Couldn't create iova cache\n");
+ pr_err("Couldn't create iova cache\n");
return -ENOMEM;
}
}
@@ -718,8 +718,8 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
if (!new_iova)
- printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
- iova->pfn_lo, iova->pfn_lo);
+ pr_err("Reserve iova range %lx@%lx failed\n",
+ iova->pfn_lo, iova->pfn_lo);
}
spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
}
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index fb7e702dee23..4c2972f3153b 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -903,11 +903,8 @@ static const struct iommu_ops ipmmu_ops = {
.probe_device = ipmmu_probe_device,
.release_device = ipmmu_release_device,
.probe_finalize = ipmmu_probe_finalize,
-#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
- .device_group = generic_device_group,
-#else
- .device_group = ipmmu_find_group,
-#endif
+ .device_group = IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)
+ ? generic_device_group : ipmmu_find_group,
.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
.of_xlate = ipmmu_of_xlate,
};
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 10cd4db0710a..3d8a63555c25 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -34,7 +34,7 @@ __asm__ __volatile__ ( \
/* bitmap of the page sizes currently supported */
#define MSM_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M)
-DEFINE_SPINLOCK(msm_iommu_lock);
+static DEFINE_SPINLOCK(msm_iommu_lock);
static LIST_HEAD(qcom_iommu_devices);
static struct iommu_ops msm_iommu_ops;
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 7bdd74c7cb9f..c9d79cff4d17 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -265,10 +265,13 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
{
struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+ struct dma_iommu_mapping *mtk_mapping;
int ret;
- if (!data)
- return -ENODEV;
+ /* Only allow the domain created internally. */
+ mtk_mapping = data->dev->archdata.iommu;
+ if (mtk_mapping->domain != domain)
+ return 0;
if (!data->m4u_dom) {
data->m4u_dom = dom;
@@ -288,9 +291,6 @@ static void mtk_iommu_detach_device(struct iommu_domain *domain,
{
struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
- if (!data)
- return;
-
mtk_iommu_config(data, dev, false);
}
@@ -416,6 +416,11 @@ static int mtk_iommu_create_mapping(struct device *dev,
return 0;
}
+static int mtk_iommu_def_domain_type(struct device *dev)
+{
+ return IOMMU_DOMAIN_UNMANAGED;
+}
+
static struct iommu_device *mtk_iommu_probe_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
@@ -464,12 +469,10 @@ static void mtk_iommu_probe_finalize(struct device *dev)
static void mtk_iommu_release_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct mtk_iommu_data *data;
if (!fwspec || fwspec->ops != &mtk_iommu_ops)
return;
- data = dev_iommu_priv_get(dev);
iommu_fwspec_free(dev);
}
@@ -525,6 +528,7 @@ static const struct iommu_ops mtk_iommu_ops = {
.probe_device = mtk_iommu_probe_device,
.probe_finalize = mtk_iommu_probe_finalize,
.release_device = mtk_iommu_release_device,
+ .def_domain_type = mtk_iommu_def_domain_type,
.device_group = generic_device_group,
.pgsize_bitmap = ~0UL << MT2701_IOMMU_PAGE_SHIFT,
};
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 6699fe6d9e06..c8282cc212cb 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1236,6 +1236,7 @@ static int omap_iommu_probe(struct platform_device *pdev)
goto out_group;
iommu_device_set_ops(&obj->iommu, &omap_iommu_ops);
+ iommu_device_set_fwnode(&obj->iommu, &of->fwnode);
err = iommu_device_register(&obj->iommu);
if (err)
@@ -1726,6 +1727,9 @@ static struct iommu_group *omap_iommu_device_group(struct device *dev)
struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
struct iommu_group *group = ERR_PTR(-EINVAL);
+ if (!arch_data)
+ return ERR_PTR(-ENODEV);
+
if (arch_data->iommu_dev)
group = iommu_group_ref_get(arch_data->iommu_dev->group);
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index 054e476ebd49..c3e1fbd1988c 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -814,8 +814,11 @@ static int qcom_iommu_device_probe(struct platform_device *pdev)
qcom_iommu->dev = dev;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (res)
+ if (res) {
qcom_iommu->local_base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(qcom_iommu->local_base))
+ return PTR_ERR(qcom_iommu->local_base);
+ }
qcom_iommu->iface_clk = devm_clk_get(dev, "iface");
if (IS_ERR(qcom_iommu->iface_clk)) {
diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
new file mode 100644
index 000000000000..fce605e96aa2
--- /dev/null
+++ b/drivers/iommu/sun50i-iommu.c
@@ -0,0 +1,1023 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (C) 2016-2018, Allwinner Technology CO., LTD.
+// Copyright (C) 2019-2020, Cerno
+
+#include <linux/bitfield.h>
+#include <linux/bug.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-iommu.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/ioport.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#define IOMMU_RESET_REG 0x010
+#define IOMMU_ENABLE_REG 0x020
+#define IOMMU_ENABLE_ENABLE BIT(0)
+
+#define IOMMU_BYPASS_REG 0x030
+#define IOMMU_AUTO_GATING_REG 0x040
+#define IOMMU_AUTO_GATING_ENABLE BIT(0)
+
+#define IOMMU_WBUF_CTRL_REG 0x044
+#define IOMMU_OOO_CTRL_REG 0x048
+#define IOMMU_4KB_BDY_PRT_CTRL_REG 0x04c
+#define IOMMU_TTB_REG 0x050
+#define IOMMU_TLB_ENABLE_REG 0x060
+#define IOMMU_TLB_PREFETCH_REG 0x070
+#define IOMMU_TLB_PREFETCH_MASTER_ENABLE(m) BIT(m)
+
+#define IOMMU_TLB_FLUSH_REG 0x080
+#define IOMMU_TLB_FLUSH_PTW_CACHE BIT(17)
+#define IOMMU_TLB_FLUSH_MACRO_TLB BIT(16)
+#define IOMMU_TLB_FLUSH_MICRO_TLB(i) (BIT(i) & GENMASK(5, 0))
+
+#define IOMMU_TLB_IVLD_ADDR_REG 0x090
+#define IOMMU_TLB_IVLD_ADDR_MASK_REG 0x094
+#define IOMMU_TLB_IVLD_ENABLE_REG 0x098
+#define IOMMU_TLB_IVLD_ENABLE_ENABLE BIT(0)
+
+#define IOMMU_PC_IVLD_ADDR_REG 0x0a0
+#define IOMMU_PC_IVLD_ENABLE_REG 0x0a8
+#define IOMMU_PC_IVLD_ENABLE_ENABLE BIT(0)
+
+#define IOMMU_DM_AUT_CTRL_REG(d) (0x0b0 + ((d) / 2) * 4)
+#define IOMMU_DM_AUT_CTRL_RD_UNAVAIL(d, m) (1 << (((d & 1) * 16) + ((m) * 2)))
+#define IOMMU_DM_AUT_CTRL_WR_UNAVAIL(d, m) (1 << (((d & 1) * 16) + ((m) * 2) + 1))
+
+#define IOMMU_DM_AUT_OVWT_REG 0x0d0
+#define IOMMU_INT_ENABLE_REG 0x100
+#define IOMMU_INT_CLR_REG 0x104
+#define IOMMU_INT_STA_REG 0x108
+#define IOMMU_INT_ERR_ADDR_REG(i) (0x110 + (i) * 4)
+#define IOMMU_INT_ERR_ADDR_L1_REG 0x130
+#define IOMMU_INT_ERR_ADDR_L2_REG 0x134
+#define IOMMU_INT_ERR_DATA_REG(i) (0x150 + (i) * 4)
+#define IOMMU_L1PG_INT_REG 0x0180
+#define IOMMU_L2PG_INT_REG 0x0184
+
+#define IOMMU_INT_INVALID_L2PG BIT(17)
+#define IOMMU_INT_INVALID_L1PG BIT(16)
+#define IOMMU_INT_MASTER_PERMISSION(m) BIT(m)
+#define IOMMU_INT_MASTER_MASK (IOMMU_INT_MASTER_PERMISSION(0) | \
+ IOMMU_INT_MASTER_PERMISSION(1) | \
+ IOMMU_INT_MASTER_PERMISSION(2) | \
+ IOMMU_INT_MASTER_PERMISSION(3) | \
+ IOMMU_INT_MASTER_PERMISSION(4) | \
+ IOMMU_INT_MASTER_PERMISSION(5))
+#define IOMMU_INT_MASK (IOMMU_INT_INVALID_L1PG | \
+ IOMMU_INT_INVALID_L2PG | \
+ IOMMU_INT_MASTER_MASK)
+
+#define PT_ENTRY_SIZE sizeof(u32)
+
+#define NUM_DT_ENTRIES 4096
+#define DT_SIZE (NUM_DT_ENTRIES * PT_ENTRY_SIZE)
+
+#define NUM_PT_ENTRIES 256
+#define PT_SIZE (NUM_PT_ENTRIES * PT_ENTRY_SIZE)
+
+struct sun50i_iommu {
+ struct iommu_device iommu;
+
+ /* Lock to modify the IOMMU registers */
+ spinlock_t iommu_lock;
+
+ struct device *dev;
+ void __iomem *base;
+ struct reset_control *reset;
+ struct clk *clk;
+
+ struct iommu_domain *domain;
+ struct iommu_group *group;
+ struct kmem_cache *pt_pool;
+};
+
+struct sun50i_iommu_domain {
+ struct iommu_domain domain;
+
+ /* Number of devices attached to the domain */
+ refcount_t refcnt;
+
+ /* L1 Page Table */
+ u32 *dt;
+ dma_addr_t dt_dma;
+
+ struct sun50i_iommu *iommu;
+};
+
+static struct sun50i_iommu_domain *to_sun50i_domain(struct iommu_domain *domain)
+{
+ return container_of(domain, struct sun50i_iommu_domain, domain);
+}
+
+static struct sun50i_iommu *sun50i_iommu_from_dev(struct device *dev)
+{
+ return dev_iommu_priv_get(dev);
+}
+
+static u32 iommu_read(struct sun50i_iommu *iommu, u32 offset)
+{
+ return readl(iommu->base + offset);
+}
+
+static void iommu_write(struct sun50i_iommu *iommu, u32 offset, u32 value)
+{
+ writel(value, iommu->base + offset);
+}
+
+/*
+ * The Allwinner H6 IOMMU uses a 2-level page table.
+ *
+ * The first level is the usual Directory Table (DT), that consists of
+ * 4096 4-bytes Directory Table Entries (DTE), each pointing to a Page
+ * Table (PT).
+ *
+ * Each PT consits of 256 4-bytes Page Table Entries (PTE), each
+ * pointing to a 4kB page of physical memory.
+ *
+ * The IOMMU supports a single DT, pointed by the IOMMU_TTB_REG
+ * register that contains its physical address.
+ */
+
+#define SUN50I_IOVA_DTE_MASK GENMASK(31, 20)
+#define SUN50I_IOVA_PTE_MASK GENMASK(19, 12)
+#define SUN50I_IOVA_PAGE_MASK GENMASK(11, 0)
+
+static u32 sun50i_iova_get_dte_index(dma_addr_t iova)
+{
+ return FIELD_GET(SUN50I_IOVA_DTE_MASK, iova);
+}
+
+static u32 sun50i_iova_get_pte_index(dma_addr_t iova)
+{
+ return FIELD_GET(SUN50I_IOVA_PTE_MASK, iova);
+}
+
+static u32 sun50i_iova_get_page_offset(dma_addr_t iova)
+{
+ return FIELD_GET(SUN50I_IOVA_PAGE_MASK, iova);
+}
+
+/*
+ * Each Directory Table Entry has a Page Table address and a valid
+ * bit:
+
+ * +---------------------+-----------+-+
+ * | PT address | Reserved |V|
+ * +---------------------+-----------+-+
+ * 31:10 - Page Table address
+ * 9:2 - Reserved
+ * 1:0 - 1 if the entry is valid
+ */
+
+#define SUN50I_DTE_PT_ADDRESS_MASK GENMASK(31, 10)
+#define SUN50I_DTE_PT_ATTRS GENMASK(1, 0)
+#define SUN50I_DTE_PT_VALID 1
+
+static phys_addr_t sun50i_dte_get_pt_address(u32 dte)
+{
+ return (phys_addr_t)dte & SUN50I_DTE_PT_ADDRESS_MASK;
+}
+
+static bool sun50i_dte_is_pt_valid(u32 dte)
+{
+ return (dte & SUN50I_DTE_PT_ATTRS) == SUN50I_DTE_PT_VALID;
+}
+
+static u32 sun50i_mk_dte(dma_addr_t pt_dma)
+{
+ return (pt_dma & SUN50I_DTE_PT_ADDRESS_MASK) | SUN50I_DTE_PT_VALID;
+}
+
+/*
+ * Each PTE has a Page address, an authority index and a valid bit:
+ *
+ * +----------------+-----+-----+-----+---+-----+
+ * | Page address | Rsv | ACI | Rsv | V | Rsv |
+ * +----------------+-----+-----+-----+---+-----+
+ * 31:12 - Page address
+ * 11:8 - Reserved
+ * 7:4 - Authority Control Index
+ * 3:2 - Reserved
+ * 1 - 1 if the entry is valid
+ * 0 - Reserved
+ *
+ * The way permissions work is that the IOMMU has 16 "domains" that
+ * can be configured to give each masters either read or write
+ * permissions through the IOMMU_DM_AUT_CTRL_REG registers. The domain
+ * 0 seems like the default domain, and its permissions in the
+ * IOMMU_DM_AUT_CTRL_REG are only read-only, so it's not really
+ * useful to enforce any particular permission.
+ *
+ * Each page entry will then have a reference to the domain they are
+ * affected to, so that we can actually enforce them on a per-page
+ * basis.
+ *
+ * In order to make it work with the IOMMU framework, we will be using
+ * 4 different domains, starting at 1: RD_WR, RD, WR and NONE
+ * depending on the permission we want to enforce. Each domain will
+ * have each master setup in the same way, since the IOMMU framework
+ * doesn't seem to restrict page access on a per-device basis. And
+ * then we will use the relevant domain index when generating the page
+ * table entry depending on the permissions we want to be enforced.
+ */
+
+enum sun50i_iommu_aci {
+ SUN50I_IOMMU_ACI_DO_NOT_USE = 0,
+ SUN50I_IOMMU_ACI_NONE,
+ SUN50I_IOMMU_ACI_RD,
+ SUN50I_IOMMU_ACI_WR,
+ SUN50I_IOMMU_ACI_RD_WR,
+};
+
+#define SUN50I_PTE_PAGE_ADDRESS_MASK GENMASK(31, 12)
+#define SUN50I_PTE_ACI_MASK GENMASK(7, 4)
+#define SUN50I_PTE_PAGE_VALID BIT(1)
+
+static phys_addr_t sun50i_pte_get_page_address(u32 pte)
+{
+ return (phys_addr_t)pte & SUN50I_PTE_PAGE_ADDRESS_MASK;
+}
+
+static enum sun50i_iommu_aci sun50i_get_pte_aci(u32 pte)
+{
+ return FIELD_GET(SUN50I_PTE_ACI_MASK, pte);
+}
+
+static bool sun50i_pte_is_page_valid(u32 pte)
+{
+ return pte & SUN50I_PTE_PAGE_VALID;
+}
+
+static u32 sun50i_mk_pte(phys_addr_t page, int prot)
+{
+ enum sun50i_iommu_aci aci;
+ u32 flags = 0;
+
+ if (prot & (IOMMU_READ | IOMMU_WRITE))
+ aci = SUN50I_IOMMU_ACI_RD_WR;
+ else if (prot & IOMMU_READ)
+ aci = SUN50I_IOMMU_ACI_RD;
+ else if (prot & IOMMU_WRITE)
+ aci = SUN50I_IOMMU_ACI_WR;
+ else
+ aci = SUN50I_IOMMU_ACI_NONE;
+
+ flags |= FIELD_PREP(SUN50I_PTE_ACI_MASK, aci);
+ page &= SUN50I_PTE_PAGE_ADDRESS_MASK;
+ return page | flags | SUN50I_PTE_PAGE_VALID;
+}
+
+static void sun50i_table_flush(struct sun50i_iommu_domain *sun50i_domain,
+ void *vaddr, unsigned int count)
+{
+ struct sun50i_iommu *iommu = sun50i_domain->iommu;
+ dma_addr_t dma = virt_to_phys(vaddr);
+ size_t size = count * PT_ENTRY_SIZE;
+
+ dma_sync_single_for_device(iommu->dev, dma, size, DMA_TO_DEVICE);
+}
+
+static int sun50i_iommu_flush_all_tlb(struct sun50i_iommu *iommu)
+{
+ u32 reg;
+ int ret;
+
+ assert_spin_locked(&iommu->iommu_lock);
+
+ iommu_write(iommu,
+ IOMMU_TLB_FLUSH_REG,
+ IOMMU_TLB_FLUSH_PTW_CACHE |
+ IOMMU_TLB_FLUSH_MACRO_TLB |
+ IOMMU_TLB_FLUSH_MICRO_TLB(5) |
+ IOMMU_TLB_FLUSH_MICRO_TLB(4) |
+ IOMMU_TLB_FLUSH_MICRO_TLB(3) |
+ IOMMU_TLB_FLUSH_MICRO_TLB(2) |
+ IOMMU_TLB_FLUSH_MICRO_TLB(1) |
+ IOMMU_TLB_FLUSH_MICRO_TLB(0));
+
+ ret = readl_poll_timeout(iommu->base + IOMMU_TLB_FLUSH_REG,
+ reg, !reg,
+ 1, 2000);
+ if (ret)
+ dev_warn(iommu->dev, "TLB Flush timed out!\n");
+
+ return ret;
+}
+
+static void sun50i_iommu_flush_iotlb_all(struct iommu_domain *domain)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ struct sun50i_iommu *iommu = sun50i_domain->iommu;
+ unsigned long flags;
+
+ /*
+ * At boot, we'll have a first call into .flush_iotlb_all right after
+ * .probe_device, and since we link our (single) domain to our iommu in
+ * the .attach_device callback, we don't have that pointer set.
+ *
+ * It shouldn't really be any trouble to ignore it though since we flush
+ * all caches as part of the device powerup.
+ */
+ if (!iommu)
+ return;
+
+ spin_lock_irqsave(&iommu->iommu_lock, flags);
+ sun50i_iommu_flush_all_tlb(iommu);
+ spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+}
+
+static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather)
+{
+ sun50i_iommu_flush_iotlb_all(domain);
+}
+
+static int sun50i_iommu_enable(struct sun50i_iommu *iommu)
+{
+ struct sun50i_iommu_domain *sun50i_domain;
+ unsigned long flags;
+ int ret;
+
+ if (!iommu->domain)
+ return 0;
+
+ sun50i_domain = to_sun50i_domain(iommu->domain);
+
+ ret = reset_control_deassert(iommu->reset);
+ if (ret)
+ return ret;
+
+ ret = clk_prepare_enable(iommu->clk);
+ if (ret)
+ goto err_reset_assert;
+
+ spin_lock_irqsave(&iommu->iommu_lock, flags);
+
+ iommu_write(iommu, IOMMU_TTB_REG, sun50i_domain->dt_dma);
+ iommu_write(iommu, IOMMU_TLB_PREFETCH_REG,
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(0) |
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(1) |
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(2) |
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(3) |
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(4) |
+ IOMMU_TLB_PREFETCH_MASTER_ENABLE(5));
+ iommu_write(iommu, IOMMU_INT_ENABLE_REG, IOMMU_INT_MASK);
+ iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_NONE),
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 0) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 0) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 1) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 1) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 2) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 2) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 3) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 3) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 4) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 4) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 5) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 5));
+
+ iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_RD),
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 0) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 1) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 2) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 3) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 4) |
+ IOMMU_DM_AUT_CTRL_WR_UNAVAIL(SUN50I_IOMMU_ACI_RD, 5));
+
+ iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_WR),
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 0) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 1) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 2) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 3) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 4) |
+ IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_WR, 5));
+
+ ret = sun50i_iommu_flush_all_tlb(iommu);
+ if (ret) {
+ spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+ goto err_clk_disable;
+ }
+
+ iommu_write(iommu, IOMMU_AUTO_GATING_REG, IOMMU_AUTO_GATING_ENABLE);
+ iommu_write(iommu, IOMMU_ENABLE_REG, IOMMU_ENABLE_ENABLE);
+
+ spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+
+ return 0;
+
+err_clk_disable:
+ clk_disable_unprepare(iommu->clk);
+
+err_reset_assert:
+ reset_control_assert(iommu->reset);
+
+ return ret;
+}
+
+static void sun50i_iommu_disable(struct sun50i_iommu *iommu)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&iommu->iommu_lock, flags);
+
+ iommu_write(iommu, IOMMU_ENABLE_REG, 0);
+ iommu_write(iommu, IOMMU_TTB_REG, 0);
+
+ spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+
+ clk_disable_unprepare(iommu->clk);
+ reset_control_assert(iommu->reset);
+}
+
+static void *sun50i_iommu_alloc_page_table(struct sun50i_iommu *iommu,
+ gfp_t gfp)
+{
+ dma_addr_t pt_dma;
+ u32 *page_table;
+
+ page_table = kmem_cache_zalloc(iommu->pt_pool, gfp);
+ if (!page_table)
+ return ERR_PTR(-ENOMEM);
+
+ pt_dma = dma_map_single(iommu->dev, page_table, PT_SIZE, DMA_TO_DEVICE);
+ if (dma_mapping_error(iommu->dev, pt_dma)) {
+ dev_err(iommu->dev, "Couldn't map L2 Page Table\n");
+ kmem_cache_free(iommu->pt_pool, page_table);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* We rely on the physical address and DMA address being the same */
+ WARN_ON(pt_dma != virt_to_phys(page_table));
+
+ return page_table;
+}
+
+static void sun50i_iommu_free_page_table(struct sun50i_iommu *iommu,
+ u32 *page_table)
+{
+ phys_addr_t pt_phys = virt_to_phys(page_table);
+
+ dma_unmap_single(iommu->dev, pt_phys, PT_SIZE, DMA_TO_DEVICE);
+ kmem_cache_free(iommu->pt_pool, page_table);
+}
+
+static u32 *sun50i_dte_get_page_table(struct sun50i_iommu_domain *sun50i_domain,
+ dma_addr_t iova, gfp_t gfp)
+{
+ struct sun50i_iommu *iommu = sun50i_domain->iommu;
+ u32 *page_table;
+ u32 *dte_addr;
+ u32 old_dte;
+ u32 dte;
+
+ dte_addr = &sun50i_domain->dt[sun50i_iova_get_dte_index(iova)];
+ dte = *dte_addr;
+ if (sun50i_dte_is_pt_valid(dte)) {
+ phys_addr_t pt_phys = sun50i_dte_get_pt_address(dte);
+ return (u32 *)phys_to_virt(pt_phys);
+ }
+
+ page_table = sun50i_iommu_alloc_page_table(iommu, gfp);
+ if (IS_ERR(page_table))
+ return page_table;
+
+ dte = sun50i_mk_dte(virt_to_phys(page_table));
+ old_dte = cmpxchg(dte_addr, 0, dte);
+ if (old_dte) {
+ phys_addr_t installed_pt_phys =
+ sun50i_dte_get_pt_address(old_dte);
+ u32 *installed_pt = phys_to_virt(installed_pt_phys);
+ u32 *drop_pt = page_table;
+
+ page_table = installed_pt;
+ dte = old_dte;
+ sun50i_iommu_free_page_table(iommu, drop_pt);
+ }
+
+ sun50i_table_flush(sun50i_domain, page_table, PT_SIZE);
+ sun50i_table_flush(sun50i_domain, dte_addr, 1);
+
+ return page_table;
+}
+
+static int sun50i_iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ struct sun50i_iommu *iommu = sun50i_domain->iommu;
+ u32 pte_index;
+ u32 *page_table, *pte_addr;
+ int ret = 0;
+
+ page_table = sun50i_dte_get_page_table(sun50i_domain, iova, gfp);
+ if (IS_ERR(page_table)) {
+ ret = PTR_ERR(page_table);
+ goto out;
+ }
+
+ pte_index = sun50i_iova_get_pte_index(iova);
+ pte_addr = &page_table[pte_index];
+ if (unlikely(sun50i_pte_is_page_valid(*pte_addr))) {
+ phys_addr_t page_phys = sun50i_pte_get_page_address(*pte_addr);
+ dev_err(iommu->dev,
+ "iova %pad already mapped to %pa cannot remap to %pa prot: %#x\n",
+ &iova, &page_phys, &paddr, prot);
+ ret = -EBUSY;
+ goto out;
+ }
+
+ *pte_addr = sun50i_mk_pte(paddr, prot);
+ sun50i_table_flush(sun50i_domain, pte_addr, 1);
+
+out:
+ return ret;
+}
+
+static size_t sun50i_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+ size_t size, struct iommu_iotlb_gather *gather)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ phys_addr_t pt_phys;
+ dma_addr_t pte_dma;
+ u32 *pte_addr;
+ u32 dte;
+
+ dte = sun50i_domain->dt[sun50i_iova_get_dte_index(iova)];
+ if (!sun50i_dte_is_pt_valid(dte))
+ return 0;
+
+ pt_phys = sun50i_dte_get_pt_address(dte);
+ pte_addr = (u32 *)phys_to_virt(pt_phys) + sun50i_iova_get_pte_index(iova);
+ pte_dma = pt_phys + sun50i_iova_get_pte_index(iova) * PT_ENTRY_SIZE;
+
+ if (!sun50i_pte_is_page_valid(*pte_addr))
+ return 0;
+
+ memset(pte_addr, 0, sizeof(*pte_addr));
+ sun50i_table_flush(sun50i_domain, pte_addr, 1);
+
+ return SZ_4K;
+}
+
+static phys_addr_t sun50i_iommu_iova_to_phys(struct iommu_domain *domain,
+ dma_addr_t iova)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ phys_addr_t pt_phys;
+ u32 *page_table;
+ u32 dte, pte;
+
+ dte = sun50i_domain->dt[sun50i_iova_get_dte_index(iova)];
+ if (!sun50i_dte_is_pt_valid(dte))
+ return 0;
+
+ pt_phys = sun50i_dte_get_pt_address(dte);
+ page_table = (u32 *)phys_to_virt(pt_phys);
+ pte = page_table[sun50i_iova_get_pte_index(iova)];
+ if (!sun50i_pte_is_page_valid(pte))
+ return 0;
+
+ return sun50i_pte_get_page_address(pte) +
+ sun50i_iova_get_page_offset(iova);
+}
+
+static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type)
+{
+ struct sun50i_iommu_domain *sun50i_domain;
+
+ if (type != IOMMU_DOMAIN_DMA &&
+ type != IOMMU_DOMAIN_IDENTITY &&
+ type != IOMMU_DOMAIN_UNMANAGED)
+ return NULL;
+
+ sun50i_domain = kzalloc(sizeof(*sun50i_domain), GFP_KERNEL);
+ if (!sun50i_domain)
+ return NULL;
+
+ if (type == IOMMU_DOMAIN_DMA &&
+ iommu_get_dma_cookie(&sun50i_domain->domain))
+ goto err_free_domain;
+
+ sun50i_domain->dt = (u32 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(DT_SIZE));
+ if (!sun50i_domain->dt)
+ goto err_put_cookie;
+
+ refcount_set(&sun50i_domain->refcnt, 1);
+
+ sun50i_domain->domain.geometry.aperture_start = 0;
+ sun50i_domain->domain.geometry.aperture_end = DMA_BIT_MASK(32);
+ sun50i_domain->domain.geometry.force_aperture = true;
+
+ return &sun50i_domain->domain;
+
+err_put_cookie:
+ if (type == IOMMU_DOMAIN_DMA)
+ iommu_put_dma_cookie(&sun50i_domain->domain);
+
+err_free_domain:
+ kfree(sun50i_domain);
+
+ return NULL;
+}
+
+static void sun50i_iommu_domain_free(struct iommu_domain *domain)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+
+ free_pages((unsigned long)sun50i_domain->dt, get_order(DT_SIZE));
+ sun50i_domain->dt = NULL;
+
+ iommu_put_dma_cookie(domain);
+
+ kfree(sun50i_domain);
+}
+
+static int sun50i_iommu_attach_domain(struct sun50i_iommu *iommu,
+ struct sun50i_iommu_domain *sun50i_domain)
+{
+ iommu->domain = &sun50i_domain->domain;
+ sun50i_domain->iommu = iommu;
+
+ sun50i_domain->dt_dma = dma_map_single(iommu->dev, sun50i_domain->dt,
+ DT_SIZE, DMA_TO_DEVICE);
+ if (dma_mapping_error(iommu->dev, sun50i_domain->dt_dma)) {
+ dev_err(iommu->dev, "Couldn't map L1 Page Table\n");
+ return -ENOMEM;
+ }
+
+ return sun50i_iommu_enable(iommu);
+}
+
+static void sun50i_iommu_detach_domain(struct sun50i_iommu *iommu,
+ struct sun50i_iommu_domain *sun50i_domain)
+{
+ unsigned int i;
+
+ for (i = 0; i < NUM_DT_ENTRIES; i++) {
+ phys_addr_t pt_phys;
+ u32 *page_table;
+ u32 *dte_addr;
+ u32 dte;
+
+ dte_addr = &sun50i_domain->dt[i];
+ dte = *dte_addr;
+ if (!sun50i_dte_is_pt_valid(dte))
+ continue;
+
+ memset(dte_addr, 0, sizeof(*dte_addr));
+ sun50i_table_flush(sun50i_domain, dte_addr, 1);
+
+ pt_phys = sun50i_dte_get_pt_address(dte);
+ page_table = phys_to_virt(pt_phys);
+ sun50i_iommu_free_page_table(iommu, page_table);
+ }
+
+
+ sun50i_iommu_disable(iommu);
+
+ dma_unmap_single(iommu->dev, virt_to_phys(sun50i_domain->dt),
+ DT_SIZE, DMA_TO_DEVICE);
+
+ iommu->domain = NULL;
+}
+
+static void sun50i_iommu_detach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ struct sun50i_iommu *iommu = dev_iommu_priv_get(dev);
+
+ dev_dbg(dev, "Detaching from IOMMU domain\n");
+
+ if (iommu->domain != domain)
+ return;
+
+ if (refcount_dec_and_test(&sun50i_domain->refcnt))
+ sun50i_iommu_detach_domain(iommu, sun50i_domain);
+}
+
+static int sun50i_iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+ struct sun50i_iommu *iommu;
+
+ iommu = sun50i_iommu_from_dev(dev);
+ if (!iommu)
+ return -ENODEV;
+
+ dev_dbg(dev, "Attaching to IOMMU domain\n");
+
+ refcount_inc(&sun50i_domain->refcnt);
+
+ if (iommu->domain == domain)
+ return 0;
+
+ if (iommu->domain)
+ sun50i_iommu_detach_device(iommu->domain, dev);
+
+ sun50i_iommu_attach_domain(iommu, sun50i_domain);
+
+ return 0;
+}
+
+static struct iommu_device *sun50i_iommu_probe_device(struct device *dev)
+{
+ struct sun50i_iommu *iommu;
+
+ iommu = sun50i_iommu_from_dev(dev);
+ if (!iommu)
+ return ERR_PTR(-ENODEV);
+
+ return &iommu->iommu;
+}
+
+static void sun50i_iommu_release_device(struct device *dev) {}
+
+static struct iommu_group *sun50i_iommu_device_group(struct device *dev)
+{
+ struct sun50i_iommu *iommu = sun50i_iommu_from_dev(dev);
+
+ return iommu_group_ref_get(iommu->group);
+}
+
+static int sun50i_iommu_of_xlate(struct device *dev,
+ struct of_phandle_args *args)
+{
+ struct platform_device *iommu_pdev = of_find_device_by_node(args->np);
+ unsigned id = args->args[0];
+
+ dev_iommu_priv_set(dev, platform_get_drvdata(iommu_pdev));
+
+ return iommu_fwspec_add_ids(dev, &id, 1);
+}
+
+static const struct iommu_ops sun50i_iommu_ops = {
+ .pgsize_bitmap = SZ_4K,
+ .attach_dev = sun50i_iommu_attach_device,
+ .detach_dev = sun50i_iommu_detach_device,
+ .device_group = sun50i_iommu_device_group,
+ .domain_alloc = sun50i_iommu_domain_alloc,
+ .domain_free = sun50i_iommu_domain_free,
+ .flush_iotlb_all = sun50i_iommu_flush_iotlb_all,
+ .iotlb_sync = sun50i_iommu_iotlb_sync,
+ .iova_to_phys = sun50i_iommu_iova_to_phys,
+ .map = sun50i_iommu_map,
+ .of_xlate = sun50i_iommu_of_xlate,
+ .probe_device = sun50i_iommu_probe_device,
+ .release_device = sun50i_iommu_release_device,
+ .unmap = sun50i_iommu_unmap,
+};
+
+static void sun50i_iommu_report_fault(struct sun50i_iommu *iommu,
+ unsigned master, phys_addr_t iova,
+ unsigned prot)
+{
+ dev_err(iommu->dev, "Page fault for %pad (master %d, dir %s)\n",
+ &iova, master, (prot == IOMMU_FAULT_WRITE) ? "wr" : "rd");
+
+ if (iommu->domain)
+ report_iommu_fault(iommu->domain, iommu->dev, iova, prot);
+ else
+ dev_err(iommu->dev, "Page fault while iommu not attached to any domain?\n");
+}
+
+static phys_addr_t sun50i_iommu_handle_pt_irq(struct sun50i_iommu *iommu,
+ unsigned addr_reg,
+ unsigned blame_reg)
+{
+ phys_addr_t iova;
+ unsigned master;
+ u32 blame;
+
+ assert_spin_locked(&iommu->iommu_lock);
+
+ iova = iommu_read(iommu, addr_reg);
+ blame = iommu_read(iommu, blame_reg);
+ master = ilog2(blame & IOMMU_INT_MASTER_MASK);
+
+ /*
+ * If the address is not in the page table, we can't get what
+ * operation triggered the fault. Assume it's a read
+ * operation.
+ */
+ sun50i_iommu_report_fault(iommu, master, iova, IOMMU_FAULT_READ);
+
+ return iova;
+}
+
+static phys_addr_t sun50i_iommu_handle_perm_irq(struct sun50i_iommu *iommu)
+{
+ enum sun50i_iommu_aci aci;
+ phys_addr_t iova;
+ unsigned master;
+ unsigned dir;
+ u32 blame;
+
+ assert_spin_locked(&iommu->iommu_lock);
+
+ blame = iommu_read(iommu, IOMMU_INT_STA_REG);
+ master = ilog2(blame & IOMMU_INT_MASTER_MASK);
+ iova = iommu_read(iommu, IOMMU_INT_ERR_ADDR_REG(master));
+ aci = sun50i_get_pte_aci(iommu_read(iommu,
+ IOMMU_INT_ERR_DATA_REG(master)));
+
+ switch (aci) {
+ /*
+ * If we are in the read-only domain, then it means we
+ * tried to write.
+ */
+ case SUN50I_IOMMU_ACI_RD:
+ dir = IOMMU_FAULT_WRITE;
+ break;
+
+ /*
+ * If we are in the write-only domain, then it means
+ * we tried to read.
+ */
+ case SUN50I_IOMMU_ACI_WR:
+
+ /*
+ * If we are in the domain without any permission, we
+ * can't really tell. Let's default to a read
+ * operation.
+ */
+ case SUN50I_IOMMU_ACI_NONE:
+
+ /* WTF? */
+ case SUN50I_IOMMU_ACI_RD_WR:
+ default:
+ dir = IOMMU_FAULT_READ;
+ break;
+ }
+
+ /*
+ * If the address is not in the page table, we can't get what
+ * operation triggered the fault. Assume it's a read
+ * operation.
+ */
+ sun50i_iommu_report_fault(iommu, master, iova, dir);
+
+ return iova;
+}
+
+static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id)
+{
+ struct sun50i_iommu *iommu = dev_id;
+ phys_addr_t iova;
+ u32 status;
+
+ spin_lock(&iommu->iommu_lock);
+
+ status = iommu_read(iommu, IOMMU_INT_STA_REG);
+ if (!(status & IOMMU_INT_MASK)) {
+ spin_unlock(&iommu->iommu_lock);
+ return IRQ_NONE;
+ }
+
+ if (status & IOMMU_INT_INVALID_L2PG)
+ iova = sun50i_iommu_handle_pt_irq(iommu,
+ IOMMU_INT_ERR_ADDR_L2_REG,
+ IOMMU_L2PG_INT_REG);
+ else if (status & IOMMU_INT_INVALID_L1PG)
+ iova = sun50i_iommu_handle_pt_irq(iommu,
+ IOMMU_INT_ERR_ADDR_L1_REG,
+ IOMMU_L1PG_INT_REG);
+ else
+ iova = sun50i_iommu_handle_perm_irq(iommu);
+
+ iommu_write(iommu, IOMMU_INT_CLR_REG, status);
+
+ iommu_write(iommu, IOMMU_RESET_REG, ~status);
+ iommu_write(iommu, IOMMU_RESET_REG, status);
+
+ spin_unlock(&iommu->iommu_lock);
+
+ return IRQ_HANDLED;
+}
+
+static int sun50i_iommu_probe(struct platform_device *pdev)
+{
+ struct sun50i_iommu *iommu;
+ int ret, irq;
+
+ iommu = devm_kzalloc(&pdev->dev, sizeof(*iommu), GFP_KERNEL);
+ if (!iommu)
+ return -ENOMEM;
+ spin_lock_init(&iommu->iommu_lock);
+ platform_set_drvdata(pdev, iommu);
+ iommu->dev = &pdev->dev;
+
+ iommu->pt_pool = kmem_cache_create(dev_name(&pdev->dev),
+ PT_SIZE, PT_SIZE,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!iommu->pt_pool)
+ return -ENOMEM;
+
+ iommu->group = iommu_group_alloc();
+ if (IS_ERR(iommu->group)) {
+ ret = PTR_ERR(iommu->group);
+ goto err_free_cache;
+ }
+
+ iommu->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(iommu->base)) {
+ ret = PTR_ERR(iommu->base);
+ goto err_free_group;
+ }
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ ret = irq;
+ goto err_free_group;
+ }
+
+ iommu->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(iommu->clk)) {
+ dev_err(&pdev->dev, "Couldn't get our clock.\n");
+ ret = PTR_ERR(iommu->clk);
+ goto err_free_group;
+ }
+
+ iommu->reset = devm_reset_control_get(&pdev->dev, NULL);
+ if (IS_ERR(iommu->reset)) {
+ dev_err(&pdev->dev, "Couldn't get our reset line.\n");
+ ret = PTR_ERR(iommu->reset);
+ goto err_free_group;
+ }
+
+ ret = iommu_device_sysfs_add(&iommu->iommu, &pdev->dev,
+ NULL, dev_name(&pdev->dev));
+ if (ret)
+ goto err_free_group;
+
+ iommu_device_set_ops(&iommu->iommu, &sun50i_iommu_ops);
+ iommu_device_set_fwnode(&iommu->iommu, &pdev->dev.of_node->fwnode);
+
+ ret = iommu_device_register(&iommu->iommu);
+ if (ret)
+ goto err_remove_sysfs;
+
+ ret = devm_request_irq(&pdev->dev, irq, sun50i_iommu_irq, 0,
+ dev_name(&pdev->dev), iommu);
+ if (ret < 0)
+ goto err_unregister;
+
+ bus_set_iommu(&platform_bus_type, &sun50i_iommu_ops);
+
+ return 0;
+
+err_unregister:
+ iommu_device_unregister(&iommu->iommu);
+
+err_remove_sysfs:
+ iommu_device_sysfs_remove(&iommu->iommu);
+
+err_free_group:
+ iommu_group_put(iommu->group);
+
+err_free_cache:
+ kmem_cache_destroy(iommu->pt_pool);
+
+ return ret;
+}
+
+static const struct of_device_id sun50i_iommu_dt[] = {
+ { .compatible = "allwinner,sun50i-h6-iommu", },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, sun50i_iommu_dt);
+
+static struct platform_driver sun50i_iommu_driver = {
+ .driver = {
+ .name = "sun50i-iommu",
+ .of_match_table = sun50i_iommu_dt,
+ .suppress_bind_attrs = true,
+ }
+};
+builtin_platform_driver_probe(sun50i_iommu_driver, sun50i_iommu_probe);
+
+MODULE_DESCRIPTION("Allwinner H6 IOMMU driver");
+MODULE_AUTHOR("Maxime Ripard <maxime@cerno.tech>");
+MODULE_AUTHOR("zhuxianbin <zhuxianbin@allwinnertech.com>");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index bda300c2a438..f6f07489a9aa 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -453,7 +453,7 @@ static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
if (!region)
return -ENOMEM;
- list_add(&vdev->resv_regions, &region->list);
+ list_add(&region->list, &vdev->resv_regions);
return 0;
}