diff options
Diffstat (limited to 'drivers/iommu')
-rw-r--r-- | drivers/iommu/Kconfig | 13 | ||||
-rw-r--r-- | drivers/iommu/Makefile | 2 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu.c | 224 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_init.c | 38 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_types.h | 10 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_v2.c | 107 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu.c | 2 | ||||
-rw-r--r-- | drivers/iommu/dmar.c | 12 | ||||
-rw-r--r-- | drivers/iommu/exynos-iommu.c | 2 | ||||
-rw-r--r-- | drivers/iommu/fsl_pamu.c | 10 | ||||
-rw-r--r-- | drivers/iommu/fsl_pamu_domain.c | 91 | ||||
-rw-r--r-- | drivers/iommu/intel-iommu.c | 874 | ||||
-rw-r--r-- | drivers/iommu/intel_irq_remapping.c | 60 | ||||
-rw-r--r-- | drivers/iommu/iommu-sysfs.c | 134 | ||||
-rw-r--r-- | drivers/iommu/iommu.c | 201 | ||||
-rw-r--r-- | drivers/iommu/ipmmu-vmsa.c | 2 | ||||
-rw-r--r-- | drivers/iommu/msm_iommu.c | 2 | ||||
-rw-r--r-- | drivers/iommu/omap-iommu-debug.c | 114 | ||||
-rw-r--r-- | drivers/iommu/omap-iommu.c | 15 | ||||
-rw-r--r-- | drivers/iommu/omap-iommu.h | 8 | ||||
-rw-r--r-- | drivers/iommu/omap-iovmm.c | 791 | ||||
-rw-r--r-- | drivers/iommu/pci.h | 29 | ||||
-rw-r--r-- | drivers/iommu/shmobile-iommu.c | 2 | ||||
-rw-r--r-- | drivers/iommu/tegra-gart.c | 2 | ||||
-rw-r--r-- | drivers/iommu/tegra-smmu.c | 2 |
25 files changed, 1028 insertions, 1719 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index d260605e6d5f..dd5112265cc9 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -76,7 +76,7 @@ config AMD_IOMMU_STATS config AMD_IOMMU_V2 tristate "AMD IOMMU Version 2 driver" - depends on AMD_IOMMU && PROFILING + depends on AMD_IOMMU select MMU_NOTIFIER ---help--- This option enables support for the AMD IOMMUv2 features of the IOMMU @@ -143,16 +143,12 @@ config OMAP_IOMMU depends on ARCH_OMAP2PLUS select IOMMU_API -config OMAP_IOVMM - tristate "OMAP IO Virtual Memory Manager Support" - depends on OMAP_IOMMU - config OMAP_IOMMU_DEBUG - tristate "Export OMAP IOMMU/IOVMM internals in DebugFS" - depends on OMAP_IOVMM && DEBUG_FS + tristate "Export OMAP IOMMU internals in DebugFS" + depends on OMAP_IOMMU && DEBUG_FS help Select this to see extensive information about - the internal state of OMAP IOMMU/IOVMM in debugfs. + the internal state of OMAP IOMMU in debugfs. Say N unless you know you need this. @@ -180,6 +176,7 @@ config EXYNOS_IOMMU bool "Exynos IOMMU Support" depends on ARCH_EXYNOS select IOMMU_API + select ARM_DMA_USE_IOMMU help Support for the IOMMU (System MMU) of Samsung Exynos application processor family. This enables H/W multimedia accelerators to see diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 8893bad048e0..16edef74b8ee 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o +obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o @@ -11,7 +12,6 @@ obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o obj-$(CONFIG_OMAP_IOMMU) += omap-iommu2.o -obj-$(CONFIG_OMAP_IOVMM) += omap-iovmm.o obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 4aec6a29e316..18405314168b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -46,7 +46,6 @@ #include "amd_iommu_proto.h" #include "amd_iommu_types.h" #include "irq_remapping.h" -#include "pci.h" #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) @@ -81,7 +80,7 @@ LIST_HEAD(hpet_map); */ static struct protection_domain *pt_domain; -static struct iommu_ops amd_iommu_ops; +static const struct iommu_ops amd_iommu_ops; static ATOMIC_NOTIFIER_HEAD(ppr_notifier); int amd_iommu_max_glx_val = -1; @@ -133,9 +132,6 @@ static void free_dev_data(struct iommu_dev_data *dev_data) list_del(&dev_data->dev_data_list); spin_unlock_irqrestore(&dev_data_list_lock, flags); - if (dev_data->group) - iommu_group_put(dev_data->group); - kfree(dev_data); } @@ -264,167 +260,79 @@ static bool check_device(struct device *dev) return true; } -static struct pci_bus *find_hosted_bus(struct pci_bus *bus) -{ - while (!bus->self) { - if (!pci_is_root_bus(bus)) - bus = bus->parent; - else - return ERR_PTR(-ENODEV); - } - - return bus; -} - -#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) - -static struct pci_dev *get_isolation_root(struct pci_dev *pdev) -{ - struct pci_dev *dma_pdev = pdev; - - /* Account for quirked devices */ - swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev)); - - /* - * If it's a multifunction device that does not support our - * required ACS flags, add to the same group as lowest numbered - * function that also does not suport the required ACS flags. - */ - if (dma_pdev->multifunction && - !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) { - u8 i, slot = PCI_SLOT(dma_pdev->devfn); - - for (i = 0; i < 8; i++) { - struct pci_dev *tmp; - - tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i)); - if (!tmp) - continue; - - if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) { - swap_pci_ref(&dma_pdev, tmp); - break; - } - pci_dev_put(tmp); - } - } - - /* - * Devices on the root bus go through the iommu. If that's not us, - * find the next upstream device and test ACS up to the root bus. - * Finding the next device may require skipping virtual buses. - */ - while (!pci_is_root_bus(dma_pdev->bus)) { - struct pci_bus *bus = find_hosted_bus(dma_pdev->bus); - if (IS_ERR(bus)) - break; - - if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) - break; - - swap_pci_ref(&dma_pdev, pci_dev_get(bus->self)); - } - - return dma_pdev; -} - -static int use_pdev_iommu_group(struct pci_dev *pdev, struct device *dev) +static int init_iommu_group(struct device *dev) { - struct iommu_group *group = iommu_group_get(&pdev->dev); - int ret; + struct iommu_group *group; - if (!group) { - group = iommu_group_alloc(); - if (IS_ERR(group)) - return PTR_ERR(group); + group = iommu_group_get_for_dev(dev); - WARN_ON(&pdev->dev != dev); - } + if (IS_ERR(group)) + return PTR_ERR(group); - ret = iommu_group_add_device(group, dev); iommu_group_put(group); - return ret; + return 0; } -static int use_dev_data_iommu_group(struct iommu_dev_data *dev_data, - struct device *dev) +static int __last_alias(struct pci_dev *pdev, u16 alias, void *data) { - if (!dev_data->group) { - struct iommu_group *group = iommu_group_alloc(); - if (IS_ERR(group)) - return PTR_ERR(group); - - dev_data->group = group; - } - - return iommu_group_add_device(dev_data->group, dev); + *(u16 *)data = alias; + return 0; } -static int init_iommu_group(struct device *dev) +static u16 get_alias(struct device *dev) { - struct iommu_dev_data *dev_data; - struct iommu_group *group; - struct pci_dev *dma_pdev; - int ret; - - group = iommu_group_get(dev); - if (group) { - iommu_group_put(group); - return 0; - } - - dev_data = find_dev_data(get_device_id(dev)); - if (!dev_data) - return -ENOMEM; + struct pci_dev *pdev = to_pci_dev(dev); + u16 devid, ivrs_alias, pci_alias; - if (dev_data->alias_data) { - u16 alias; - struct pci_bus *bus; + devid = get_device_id(dev); + ivrs_alias = amd_iommu_alias_table[devid]; + pci_for_each_dma_alias(pdev, __last_alias, &pci_alias); - if (dev_data->alias_data->group) - goto use_group; + if (ivrs_alias == pci_alias) + return ivrs_alias; - /* - * If the alias device exists, it's effectively just a first - * level quirk for finding the DMA source. - */ - alias = amd_iommu_alias_table[dev_data->devid]; - dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff); - if (dma_pdev) { - dma_pdev = get_isolation_root(dma_pdev); - goto use_pdev; + /* + * DMA alias showdown + * + * The IVRS is fairly reliable in telling us about aliases, but it + * can't know about every screwy device. If we don't have an IVRS + * reported alias, use the PCI reported alias. In that case we may + * still need to initialize the rlookup and dev_table entries if the + * alias is to a non-existent device. + */ + if (ivrs_alias == devid) { + if (!amd_iommu_rlookup_table[pci_alias]) { + amd_iommu_rlookup_table[pci_alias] = + amd_iommu_rlookup_table[devid]; + memcpy(amd_iommu_dev_table[pci_alias].data, + amd_iommu_dev_table[devid].data, + sizeof(amd_iommu_dev_table[pci_alias].data)); } - /* - * If the alias is virtual, try to find a parent device - * and test whether the IOMMU group is actualy rooted above - * the alias. Be careful to also test the parent device if - * we think the alias is the root of the group. - */ - bus = pci_find_bus(0, alias >> 8); - if (!bus) - goto use_group; - - bus = find_hosted_bus(bus); - if (IS_ERR(bus) || !bus->self) - goto use_group; + return pci_alias; + } - dma_pdev = get_isolation_root(pci_dev_get(bus->self)); - if (dma_pdev != bus->self || (dma_pdev->multifunction && - !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS))) - goto use_pdev; + pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d " + "for device %s[%04x:%04x], kernel reported alias " + "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias), + PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device, + PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias), + PCI_FUNC(pci_alias)); - pci_dev_put(dma_pdev); - goto use_group; + /* + * If we don't have a PCI DMA alias and the IVRS alias is on the same + * bus, then the IVRS table may know about a quirk that we don't. + */ + if (pci_alias == devid && + PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { + pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN; + pdev->dma_alias_devfn = ivrs_alias & 0xff; + pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n", + PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias), + dev_name(dev)); } - dma_pdev = get_isolation_root(pci_dev_get(to_pci_dev(dev))); -use_pdev: - ret = use_pdev_iommu_group(dma_pdev, dev); - pci_dev_put(dma_pdev); - return ret; -use_group: - return use_dev_data_iommu_group(dev_data->alias_data, dev); + return ivrs_alias; } static int iommu_init_device(struct device *dev) @@ -441,7 +349,8 @@ static int iommu_init_device(struct device *dev) if (!dev_data) return -ENOMEM; - alias = amd_iommu_alias_table[dev_data->devid]; + alias = get_alias(dev); + if (alias != dev_data->devid) { struct iommu_dev_data *alias_data; @@ -470,6 +379,9 @@ static int iommu_init_device(struct device *dev) dev->archdata.iommu = dev_data; + iommu_device_link(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev, + dev); + return 0; } @@ -489,12 +401,22 @@ static void iommu_ignore_device(struct device *dev) static void iommu_uninit_device(struct device *dev) { + struct iommu_dev_data *dev_data = search_dev_data(get_device_id(dev)); + + if (!dev_data) + return; + + iommu_device_unlink(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev, + dev); + iommu_group_remove_device(dev); + /* Unlink from alias, it may change if another device is re-plugged */ + dev_data->alias_data = NULL; + /* - * Nothing to do here - we keep dev_data around for unplugged devices - * and reuse it when the device is re-plugged - not doing so would - * introduce a ton of races. + * We keep dev_data around for unplugged devices and reuse it when the + * device is re-plugged - not doing so would introduce a ton of races. */ } @@ -3473,7 +3395,7 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain, return 0; } -static struct iommu_ops amd_iommu_ops = { +static const struct iommu_ops amd_iommu_ops = { .domain_init = amd_iommu_domain_init, .domain_destroy = amd_iommu_domain_destroy, .attach_dev = amd_iommu_attach_device, diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 0e08545d7298..3783e0b44df6 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -26,6 +26,7 @@ #include <linux/msi.h> #include <linux/amd-iommu.h> #include <linux/export.h> +#include <linux/iommu.h> #include <asm/pci-direct.h> #include <asm/iommu.h> #include <asm/gart.h> @@ -1197,6 +1198,39 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) iommu->max_counters = (u8) ((val >> 7) & 0xf); } +static ssize_t amd_iommu_show_cap(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amd_iommu *iommu = dev_get_drvdata(dev); + return sprintf(buf, "%x\n", iommu->cap); +} +static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL); + +static ssize_t amd_iommu_show_features(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amd_iommu *iommu = dev_get_drvdata(dev); + return sprintf(buf, "%llx\n", iommu->features); +} +static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); + +static struct attribute *amd_iommu_attrs[] = { + &dev_attr_cap.attr, + &dev_attr_features.attr, + NULL, +}; + +static struct attribute_group amd_iommu_group = { + .name = "amd-iommu", + .attrs = amd_iommu_attrs, +}; + +static const struct attribute_group *amd_iommu_groups[] = { + &amd_iommu_group, + NULL, +}; static int iommu_init_pci(struct amd_iommu *iommu) { @@ -1297,6 +1331,10 @@ static int iommu_init_pci(struct amd_iommu *iommu) amd_iommu_erratum_746_workaround(iommu); + iommu->iommu_dev = iommu_device_create(&iommu->dev->dev, iommu, + amd_iommu_groups, "ivhd%d", + iommu->index); + return pci_enable_device(iommu->dev); } diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index f1a5abf11acf..8e43b7cba133 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -390,12 +390,6 @@ struct amd_iommu_fault { }; -#define PPR_FAULT_EXEC (1 << 1) -#define PPR_FAULT_READ (1 << 2) -#define PPR_FAULT_WRITE (1 << 5) -#define PPR_FAULT_USER (1 << 6) -#define PPR_FAULT_RSVD (1 << 7) -#define PPR_FAULT_GN (1 << 8) struct iommu_domain; @@ -432,7 +426,6 @@ struct iommu_dev_data { struct iommu_dev_data *alias_data;/* The alias dev_data */ struct protection_domain *domain; /* Domain the device is bound to */ atomic_t bind; /* Domain attach reference count */ - struct iommu_group *group; /* IOMMU group for virtual aliases */ u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ bool passthrough; /* Default for device is pt_domain */ @@ -578,6 +571,9 @@ struct amd_iommu { /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; + /* IOMMU sysfs device */ + struct device *iommu_dev; + /* * We can't rely on the BIOS to restore all values on reinit, so we * need to stash them diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 499b4366a98d..5f578e850fc5 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -47,12 +47,13 @@ struct pasid_state { atomic_t count; /* Reference count */ unsigned mmu_notifier_count; /* Counting nested mmu_notifier calls */ - struct task_struct *task; /* Task bound to this PASID */ struct mm_struct *mm; /* mm_struct for the faults */ - struct mmu_notifier mn; /* mmu_otifier handle */ + struct mmu_notifier mn; /* mmu_notifier handle */ struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ struct device_state *device_state; /* Link to our device_state */ int pasid; /* PASID index */ + bool invalid; /* Used during setup and + teardown of the pasid */ spinlock_t lock; /* Protect pri_queues and mmu_notifer_count */ wait_queue_head_t wq; /* To wait for count == 0 */ @@ -99,7 +100,6 @@ static struct workqueue_struct *iommu_wq; static u64 *empty_page_table; static void free_pasid_states(struct device_state *dev_state); -static void unbind_pasid(struct device_state *dev_state, int pasid); static u16 device_id(struct pci_dev *pdev) { @@ -297,37 +297,29 @@ static void put_pasid_state_wait(struct pasid_state *pasid_state) schedule(); finish_wait(&pasid_state->wq, &wait); - mmput(pasid_state->mm); free_pasid_state(pasid_state); } -static void __unbind_pasid(struct pasid_state *pasid_state) +static void unbind_pasid(struct pasid_state *pasid_state) { struct iommu_domain *domain; domain = pasid_state->device_state->domain; + /* + * Mark pasid_state as invalid, no more faults will we added to the + * work queue after this is visible everywhere. + */ + pasid_state->invalid = true; + + /* Make sure this is visible */ + smp_wmb(); + + /* After this the device/pasid can't access the mm anymore */ amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid); - clear_pasid_state(pasid_state->device_state, pasid_state->pasid); /* Make sure no more pending faults are in the queue */ flush_workqueue(iommu_wq); - - mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); - - put_pasid_state(pasid_state); /* Reference taken in bind() function */ -} - -static void unbind_pasid(struct device_state *dev_state, int pasid) -{ - struct pasid_state *pasid_state; - - pasid_state = get_pasid_state(dev_state, pasid); - if (pasid_state == NULL) - return; - - __unbind_pasid(pasid_state); - put_pasid_state_wait(pasid_state); /* Reference taken in this function */ } static void free_pasid_states_level1(struct pasid_state **tbl) @@ -373,6 +365,12 @@ static void free_pasid_states(struct device_state *dev_state) * unbind the PASID */ mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); + + put_pasid_state_wait(pasid_state); /* Reference taken in + amd_iommu_bind_pasid */ + + /* Drop reference taken in amd_iommu_bind_pasid */ + put_device_state(dev_state); } if (dev_state->pasid_levels == 2) @@ -411,14 +409,6 @@ static int mn_clear_flush_young(struct mmu_notifier *mn, return 0; } -static void mn_change_pte(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long address, - pte_t pte) -{ - __mn_flush_page(mn, address); -} - static void mn_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long address) @@ -472,22 +462,23 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct pasid_state *pasid_state; struct device_state *dev_state; + bool run_inv_ctx_cb; might_sleep(); - pasid_state = mn_to_state(mn); - dev_state = pasid_state->device_state; + pasid_state = mn_to_state(mn); + dev_state = pasid_state->device_state; + run_inv_ctx_cb = !pasid_state->invalid; - if (pasid_state->device_state->inv_ctx_cb) + if (run_inv_ctx_cb && pasid_state->device_state->inv_ctx_cb) dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid); - unbind_pasid(dev_state, pasid_state->pasid); + unbind_pasid(pasid_state); } static struct mmu_notifier_ops iommu_mn = { .release = mn_release, .clear_flush_young = mn_clear_flush_young, - .change_pte = mn_change_pte, .invalidate_page = mn_invalidate_page, .invalidate_range_start = mn_invalidate_range_start, .invalidate_range_end = mn_invalidate_range_end, @@ -529,7 +520,7 @@ static void do_fault(struct work_struct *work) write = !!(fault->flags & PPR_FAULT_WRITE); down_read(&fault->state->mm->mmap_sem); - npages = get_user_pages(fault->state->task, fault->state->mm, + npages = get_user_pages(NULL, fault->state->mm, fault->address, 1, write, 0, &page, NULL); up_read(&fault->state->mm->mmap_sem); @@ -587,7 +578,7 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) goto out; pasid_state = get_pasid_state(dev_state, iommu_fault->pasid); - if (pasid_state == NULL) { + if (pasid_state == NULL || pasid_state->invalid) { /* We know the device but not the PASID -> send INVALID */ amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid, PPR_INVALID, tag); @@ -612,6 +603,7 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) fault->state = pasid_state; fault->tag = tag; fault->finish = finish; + fault->pasid = iommu_fault->pasid; fault->flags = iommu_fault->flags; INIT_WORK(&fault->work, do_fault); @@ -620,6 +612,10 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) ret = NOTIFY_OK; out_drop_state: + + if (ret != NOTIFY_OK && pasid_state) + put_pasid_state(pasid_state); + put_device_state(dev_state); out: @@ -635,6 +631,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, { struct pasid_state *pasid_state; struct device_state *dev_state; + struct mm_struct *mm; u16 devid; int ret; @@ -658,20 +655,23 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, if (pasid_state == NULL) goto out; + atomic_set(&pasid_state->count, 1); init_waitqueue_head(&pasid_state->wq); spin_lock_init(&pasid_state->lock); - pasid_state->task = task; - pasid_state->mm = get_task_mm(task); + mm = get_task_mm(task); + pasid_state->mm = mm; pasid_state->device_state = dev_state; pasid_state->pasid = pasid; + pasid_state->invalid = true; /* Mark as valid only if we are + done with setting up the pasid */ pasid_state->mn.ops = &iommu_mn; if (pasid_state->mm == NULL) goto out_free; - mmu_notifier_register(&pasid_state->mn, pasid_state->mm); + mmu_notifier_register(&pasid_state->mn, mm); ret = set_pasid_state(dev_state, pasid_state, pasid); if (ret) @@ -682,15 +682,26 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, if (ret) goto out_clear_state; + /* Now we are ready to handle faults */ + pasid_state->invalid = false; + + /* + * Drop the reference to the mm_struct here. We rely on the + * mmu_notifier release call-back to inform us when the mm + * is going away. + */ + mmput(mm); + return 0; out_clear_state: clear_pasid_state(dev_state, pasid); out_unregister: - mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); + mmu_notifier_unregister(&pasid_state->mn, mm); out_free: + mmput(mm); free_pasid_state(pasid_state); out: @@ -728,10 +739,22 @@ void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid) */ put_pasid_state(pasid_state); - /* This will call the mn_release function and unbind the PASID */ + /* Clear the pasid state so that the pasid can be re-used */ + clear_pasid_state(dev_state, pasid_state->pasid); + + /* + * Call mmu_notifier_unregister to drop our reference + * to pasid_state->mm + */ mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); + put_pasid_state_wait(pasid_state); /* Reference taken in + amd_iommu_bind_pasid */ out: + /* Drop reference taken in this function */ + put_device_state(dev_state); + + /* Drop reference taken in amd_iommu_bind_pasid */ put_device_state(dev_state); } EXPORT_SYMBOL(amd_iommu_unbind_pasid); diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index f3f66416e252..ca18d6d42a9b 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1597,7 +1597,7 @@ static void arm_smmu_remove_device(struct device *dev) iommu_group_remove_device(dev); } -static struct iommu_ops arm_smmu_ops = { +static const struct iommu_ops arm_smmu_ops = { .domain_init = arm_smmu_domain_init, .domain_destroy = arm_smmu_domain_destroy, .attach_dev = arm_smmu_attach_dev, diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 9a4f05e5b23f..4306885f48b1 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -38,6 +38,7 @@ #include <linux/tboot.h> #include <linux/dmi.h> #include <linux/slab.h> +#include <linux/iommu.h> #include <asm/irq_remapping.h> #include <asm/iommu_table.h> @@ -980,6 +981,12 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) raw_spin_lock_init(&iommu->register_lock); drhd->iommu = iommu; + + if (intel_iommu_enabled) + iommu->iommu_dev = iommu_device_create(NULL, iommu, + intel_iommu_groups, + iommu->name); + return 0; err_unmap: @@ -991,6 +998,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) static void free_iommu(struct intel_iommu *iommu) { + iommu_device_destroy(iommu->iommu_dev); + if (iommu->irq) { free_irq(iommu->irq, iommu); irq_set_handler_data(iommu->irq, NULL); @@ -1339,9 +1348,6 @@ int dmar_enable_qi(struct intel_iommu *iommu) return -ENOMEM; } - qi->free_head = qi->free_tail = 0; - qi->free_cnt = QI_LENGTH; - raw_spin_lock_init(&qi->q_lock); __dmar_enable_qi(iommu); diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 99054d2c040d..d037e87a1fe5 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1170,7 +1170,7 @@ static void exynos_iommu_remove_device(struct device *dev) iommu_group_remove_device(dev); } -static struct iommu_ops exynos_iommu_ops = { +static const struct iommu_ops exynos_iommu_ops = { .domain_init = exynos_iommu_domain_init, .domain_destroy = exynos_iommu_domain_destroy, .attach_dev = exynos_iommu_attach_device, diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index b99dd88e31b9..2b6ce9387af1 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -92,7 +92,7 @@ struct gen_pool *spaace_pool; * subwindow count per liodn. * */ -u32 pamu_get_max_subwin_cnt() +u32 pamu_get_max_subwin_cnt(void) { return max_subwindow_count; } @@ -170,10 +170,10 @@ int pamu_disable_liodn(int liodn) static unsigned int map_addrspace_size_to_wse(phys_addr_t addrspace_size) { /* Bug if not a power of 2 */ - BUG_ON(!is_power_of_2(addrspace_size)); + BUG_ON((addrspace_size & (addrspace_size - 1))); /* window size is 2^(WSE+1) bytes */ - return __ffs(addrspace_size) - 1; + return fls64(addrspace_size) - 2; } /* Derive the PAACE window count encoding for the subwindow count */ @@ -351,7 +351,7 @@ int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size, struct paace *ppaace; unsigned long fspi; - if (!is_power_of_2(win_size) || win_size < PAMU_PAGE_SIZE) { + if ((win_size & (win_size - 1)) || win_size < PAMU_PAGE_SIZE) { pr_debug("window size too small or not a power of two %llx\n", win_size); return -EINVAL; } @@ -464,7 +464,7 @@ int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin, return -ENOENT; } - if (!is_power_of_2(subwin_size) || subwin_size < PAMU_PAGE_SIZE) { + if ((subwin_size & (subwin_size - 1)) || subwin_size < PAMU_PAGE_SIZE) { pr_debug("subwindow size out of range, or not a power of 2\n"); return -EINVAL; } diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 93072ba44b1d..61d1dafa242d 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -38,7 +38,6 @@ #include <sysdev/fsl_pci.h> #include "fsl_pamu_domain.h" -#include "pci.h" /* * Global spinlock that needs to be held while @@ -301,7 +300,7 @@ static int check_size(u64 size, dma_addr_t iova) * Size must be a power of two and at least be equal * to PAMU page size. */ - if (!is_power_of_2(size) || size < PAMU_PAGE_SIZE) { + if ((size & (size - 1)) || size < PAMU_PAGE_SIZE) { pr_debug("%s: size too small or not a power of two\n", __func__); return -EINVAL; } @@ -335,11 +334,6 @@ static struct fsl_dma_domain *iommu_alloc_dma_domain(void) return domain; } -static inline struct device_domain_info *find_domain(struct device *dev) -{ - return dev->archdata.iommu_domain; -} - static void remove_device_ref(struct device_domain_info *info, u32 win_cnt) { unsigned long flags; @@ -380,7 +374,7 @@ static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct d * Check here if the device is already attached to domain or not. * If the device is already attached to a domain detach it. */ - old_domain_info = find_domain(dev); + old_domain_info = dev->archdata.iommu_domain; if (old_domain_info && old_domain_info->domain != dma_domain) { spin_unlock_irqrestore(&device_domain_lock, flags); detach_device(dev, old_domain_info->domain); @@ -399,7 +393,7 @@ static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct d * the info for the first LIODN as all * LIODNs share the same domain */ - if (!old_domain_info) + if (!dev->archdata.iommu_domain) dev->archdata.iommu_domain = info; spin_unlock_irqrestore(&device_domain_lock, flags); @@ -892,8 +886,6 @@ static int fsl_pamu_get_domain_attr(struct iommu_domain *domain, return ret; } -#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) - static struct iommu_group *get_device_iommu_group(struct device *dev) { struct iommu_group *group; @@ -950,75 +942,14 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev) struct pci_controller *pci_ctl; bool pci_endpt_partioning; struct iommu_group *group = NULL; - struct pci_dev *bridge, *dma_pdev = NULL; pci_ctl = pci_bus_to_host(pdev->bus); pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl); /* We can partition PCIe devices so assign device group to the device */ if (pci_endpt_partioning) { - bridge = pci_find_upstream_pcie_bridge(pdev); - if (bridge) { - if (pci_is_pcie(bridge)) - dma_pdev = pci_get_domain_bus_and_slot( - pci_domain_nr(pdev->bus), - bridge->subordinate->number, 0); - if (!dma_pdev) - dma_pdev = pci_dev_get(bridge); - } else - dma_pdev = pci_dev_get(pdev); - - /* Account for quirked devices */ - swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev)); - - /* - * If it's a multifunction device that does not support our - * required ACS flags, add to the same group as lowest numbered - * function that also does not suport the required ACS flags. - */ - if (dma_pdev->multifunction && - !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) { - u8 i, slot = PCI_SLOT(dma_pdev->devfn); - - for (i = 0; i < 8; i++) { - struct pci_dev *tmp; - - tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i)); - if (!tmp) - continue; - - if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) { - swap_pci_ref(&dma_pdev, tmp); - break; - } - pci_dev_put(tmp); - } - } + group = iommu_group_get_for_dev(&pdev->dev); /* - * Devices on the root bus go through the iommu. If that's not us, - * find the next upstream device and test ACS up to the root bus. - * Finding the next device may require skipping virtual buses. - */ - while (!pci_is_root_bus(dma_pdev->bus)) { - struct pci_bus *bus = dma_pdev->bus; - - while (!bus->self) { - if (!pci_is_root_bus(bus)) - bus = bus->parent; - else - goto root_bus; - } - - if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) - break; - - swap_pci_ref(&dma_pdev, pci_dev_get(bus->self)); - } - -root_bus: - group = get_device_iommu_group(&dma_pdev->dev); - pci_dev_put(dma_pdev); - /* * PCIe controller is not a paritionable entity * free the controller device iommu_group. */ @@ -1042,12 +973,15 @@ root_bus: group = get_shared_pci_device_group(pdev); } + if (!group) + group = ERR_PTR(-ENODEV); + return group; } static int fsl_pamu_add_device(struct device *dev) { - struct iommu_group *group = NULL; + struct iommu_group *group = ERR_PTR(-ENODEV); struct pci_dev *pdev; const u32 *prop; int ret, len; @@ -1070,7 +1004,7 @@ static int fsl_pamu_add_device(struct device *dev) group = get_device_iommu_group(dev); } - if (!group || IS_ERR(group)) + if (IS_ERR(group)) return PTR_ERR(group); ret = iommu_group_add_device(group, dev); @@ -1118,8 +1052,7 @@ static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count) ret = pamu_set_domain_geometry(dma_domain, &domain->geometry, ((w_count > 1) ? w_count : 0)); if (!ret) { - if (dma_domain->win_arr) - kfree(dma_domain->win_arr); + kfree(dma_domain->win_arr); dma_domain->win_arr = kzalloc(sizeof(struct dma_window) * w_count, GFP_ATOMIC); if (!dma_domain->win_arr) { @@ -1140,7 +1073,7 @@ static u32 fsl_pamu_get_windows(struct iommu_domain *domain) return dma_domain->win_cnt; } -static struct iommu_ops fsl_pamu_ops = { +static const struct iommu_ops fsl_pamu_ops = { .domain_init = fsl_pamu_domain_init, .domain_destroy = fsl_pamu_domain_destroy, .attach_dev = fsl_pamu_attach_device, @@ -1157,7 +1090,7 @@ static struct iommu_ops fsl_pamu_ops = { .remove_device = fsl_pamu_remove_device, }; -int pamu_domain_init() +int pamu_domain_init(void) { int ret = 0; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 51b6b77dc3e5..d1f5caad04f9 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -45,7 +45,6 @@ #include <asm/iommu.h> #include "irq_remapping.h" -#include "pci.h" #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE @@ -304,7 +303,7 @@ static inline bool dma_pte_present(struct dma_pte *pte) static inline bool dma_pte_superpage(struct dma_pte *pte) { - return (pte->val & (1 << 7)); + return (pte->val & DMA_PTE_LARGE_PAGE); } static inline int first_pte_in_page(struct dma_pte *pte) @@ -321,16 +320,13 @@ static inline int first_pte_in_page(struct dma_pte *pte) static struct dmar_domain *si_domain; static int hw_pass_through = 1; -/* devices under the same p2p bridge are owned in one domain */ -#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) - /* domain represents a virtual machine, more than one devices * across iommus may be owned in one domain, e.g. kvm guest. */ -#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1) +#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0) /* si_domain contains mulitple devices */ -#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2) +#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1) /* define the limit of IOMMUs supported in each domain */ #ifdef CONFIG_X86 @@ -429,6 +425,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, struct device *dev); static void iommu_detach_dependent_devices(struct intel_iommu *iommu, struct device *dev); +static int domain_detach_iommu(struct dmar_domain *domain, + struct intel_iommu *iommu); #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON int dmar_disabled = 0; @@ -451,7 +449,7 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); static DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); -static struct iommu_ops intel_iommu_ops; +static const struct iommu_ops intel_iommu_ops; static int __init intel_iommu_setup(char *str) { @@ -540,6 +538,24 @@ void free_iova_mem(struct iova *iova) kmem_cache_free(iommu_iova_cache, iova); } +static inline int domain_type_is_vm(struct dmar_domain *domain) +{ + return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE; +} + +static inline int domain_type_is_vm_or_si(struct dmar_domain *domain) +{ + return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE | + DOMAIN_FLAG_STATIC_IDENTITY); +} + +static inline int domain_pfn_supported(struct dmar_domain *domain, + unsigned long pfn) +{ + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + + return !(addr_width < BITS_PER_LONG && pfn >> addr_width); +} static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) { @@ -580,9 +596,7 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) int iommu_id; /* si_domain and vm domain should not get here. */ - BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE); - BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY); - + BUG_ON(domain_type_is_vm_or_si(domain)); iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus); if (iommu_id < 0 || iommu_id >= g_num_of_iommus) return NULL; @@ -619,50 +633,56 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) rcu_read_unlock(); } -static void domain_update_iommu_snooping(struct dmar_domain *domain) +static int domain_update_iommu_snooping(struct intel_iommu *skip) { - int i; - - domain->iommu_snooping = 1; + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + int ret = 1; - for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) { - if (!ecap_sc_support(g_iommus[i]->ecap)) { - domain->iommu_snooping = 0; - break; + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (iommu != skip) { + if (!ecap_sc_support(iommu->ecap)) { + ret = 0; + break; + } } } + rcu_read_unlock(); + + return ret; } -static void domain_update_iommu_superpage(struct dmar_domain *domain) +static int domain_update_iommu_superpage(struct intel_iommu *skip) { struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu = NULL; + struct intel_iommu *iommu; int mask = 0xf; if (!intel_iommu_superpage) { - domain->iommu_superpage = 0; - return; + return 0; } /* set iommu_superpage to the smallest common denominator */ rcu_read_lock(); for_each_active_iommu(iommu, drhd) { - mask &= cap_super_page_val(iommu->cap); - if (!mask) { - break; + if (iommu != skip) { + mask &= cap_super_page_val(iommu->cap); + if (!mask) + break; } } rcu_read_unlock(); - domain->iommu_superpage = fls(mask); + return fls(mask); } /* Some capabilities may be different across iommus */ static void domain_update_iommu_cap(struct dmar_domain *domain) { domain_update_iommu_coherency(domain); - domain_update_iommu_snooping(domain); - domain_update_iommu_superpage(domain); + domain->iommu_snooping = domain_update_iommu_snooping(NULL); + domain->iommu_superpage = domain_update_iommu_superpage(NULL); } static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) @@ -671,7 +691,7 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf struct intel_iommu *iommu; struct device *tmp; struct pci_dev *ptmp, *pdev = NULL; - u16 segment; + u16 segment = 0; int i; if (dev_is_pci(dev)) { @@ -816,14 +836,13 @@ out: static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, unsigned long pfn, int *target_level) { - int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; struct dma_pte *parent, *pte = NULL; int level = agaw_to_level(domain->agaw); int offset; BUG_ON(!domain->pgd); - if (addr_width < BITS_PER_LONG && pfn >> addr_width) + if (!domain_pfn_supported(domain, pfn)) /* Address beyond IOMMU's addressing capabilities. */ return NULL; @@ -849,13 +868,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; - if (cmpxchg64(&pte->val, 0ULL, pteval)) { + if (cmpxchg64(&pte->val, 0ULL, pteval)) /* Someone else set it while we were thinking; use theirs. */ free_pgtable_page(tmp_page); - } else { - dma_pte_addr(pte); + else domain_flush_cache(domain, pte, sizeof(*pte)); - } } if (level == 1) break; @@ -892,7 +909,7 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, break; } - if (pte->val & DMA_PTE_LARGE_PAGE) { + if (dma_pte_superpage(pte)) { *large_page = total; return pte; } @@ -908,12 +925,11 @@ static void dma_pte_clear_range(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { - int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; unsigned int large_page = 1; struct dma_pte *first_pte, *pte; - BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); - BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); + BUG_ON(!domain_pfn_supported(domain, start_pfn)); + BUG_ON(!domain_pfn_supported(domain, last_pfn)); BUG_ON(start_pfn > last_pfn); /* we don't need lock here; nobody else touches the iova range */ @@ -974,12 +990,12 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { - int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - - BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); - BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); + BUG_ON(!domain_pfn_supported(domain, start_pfn)); + BUG_ON(!domain_pfn_supported(domain, last_pfn)); BUG_ON(start_pfn > last_pfn); + dma_pte_clear_range(domain, start_pfn, last_pfn); + /* We don't need lock here; nobody else touches the iova range */ dma_pte_free_level(domain, agaw_to_level(domain->agaw), domain->pgd, 0, start_pfn, last_pfn); @@ -1077,11 +1093,10 @@ struct page *domain_unmap(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { - int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; struct page *freelist = NULL; - BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); - BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); + BUG_ON(!domain_pfn_supported(domain, start_pfn)); + BUG_ON(!domain_pfn_supported(domain, last_pfn)); BUG_ON(start_pfn > last_pfn); /* we don't need lock here; nobody else touches the iova range */ @@ -1275,7 +1290,8 @@ iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu, spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry(info, &domain->devices, link) - if (info->bus == bus && info->devfn == devfn) { + if (info->iommu == iommu && info->bus == bus && + info->devfn == devfn) { found = 1; break; } @@ -1384,7 +1400,7 @@ static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) raw_spin_unlock_irqrestore(&iommu->register_lock, flags); } -static int iommu_enable_translation(struct intel_iommu *iommu) +static void iommu_enable_translation(struct intel_iommu *iommu) { u32 sts; unsigned long flags; @@ -1398,10 +1414,9 @@ static int iommu_enable_translation(struct intel_iommu *iommu) readl, (sts & DMA_GSTS_TES), sts); raw_spin_unlock_irqrestore(&iommu->register_lock, flags); - return 0; } -static int iommu_disable_translation(struct intel_iommu *iommu) +static void iommu_disable_translation(struct intel_iommu *iommu) { u32 sts; unsigned long flag; @@ -1415,7 +1430,6 @@ static int iommu_disable_translation(struct intel_iommu *iommu) readl, (!(sts & DMA_GSTS_TES)), sts); raw_spin_unlock_irqrestore(&iommu->register_lock, flag); - return 0; } @@ -1462,8 +1476,7 @@ static int iommu_init_domains(struct intel_iommu *iommu) static void free_dmar_iommu(struct intel_iommu *iommu) { struct dmar_domain *domain; - int i, count; - unsigned long flags; + int i; if ((iommu->domains) && (iommu->domain_ids)) { for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) { @@ -1476,11 +1489,8 @@ static void free_dmar_iommu(struct intel_iommu *iommu) domain = iommu->domains[i]; clear_bit(i, iommu->domain_ids); - - spin_lock_irqsave(&domain->iommu_lock, flags); - count = --domain->iommu_count; - spin_unlock_irqrestore(&domain->iommu_lock, flags); - if (count == 0) + if (domain_detach_iommu(domain, iommu) == 0 && + !domain_type_is_vm(domain)) domain_exit(domain); } } @@ -1499,7 +1509,7 @@ static void free_dmar_iommu(struct intel_iommu *iommu) free_context_table(iommu); } -static struct dmar_domain *alloc_domain(bool vm) +static struct dmar_domain *alloc_domain(int flags) { /* domain id for virtual machine, it won't be set in context */ static atomic_t vm_domid = ATOMIC_INIT(0); @@ -1509,46 +1519,62 @@ static struct dmar_domain *alloc_domain(bool vm) if (!domain) return NULL; + memset(domain, 0, sizeof(*domain)); domain->nid = -1; - domain->iommu_count = 0; - memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp)); - domain->flags = 0; + domain->flags = flags; spin_lock_init(&domain->iommu_lock); INIT_LIST_HEAD(&domain->devices); - if (vm) { + if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE) domain->id = atomic_inc_return(&vm_domid); - domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; - } return domain; } -static int iommu_attach_domain(struct dmar_domain *domain, - struct intel_iommu *iommu) +static int __iommu_attach_domain(struct dmar_domain *domain, + struct intel_iommu *iommu) { int num; unsigned long ndomains; - unsigned long flags; ndomains = cap_ndoms(iommu->cap); - - spin_lock_irqsave(&iommu->lock, flags); - num = find_first_zero_bit(iommu->domain_ids, ndomains); - if (num >= ndomains) { - spin_unlock_irqrestore(&iommu->lock, flags); - printk(KERN_ERR "IOMMU: no free domain ids\n"); - return -ENOMEM; + if (num < ndomains) { + set_bit(num, iommu->domain_ids); + iommu->domains[num] = domain; + } else { + num = -ENOSPC; } - domain->id = num; - domain->iommu_count++; - set_bit(num, iommu->domain_ids); - set_bit(iommu->seq_id, domain->iommu_bmp); - iommu->domains[num] = domain; + return num; +} + +static int iommu_attach_domain(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + int num; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + num = __iommu_attach_domain(domain, iommu); spin_unlock_irqrestore(&iommu->lock, flags); + if (num < 0) + pr_err("IOMMU: no free domain ids\n"); - return 0; + return num; +} + +static int iommu_attach_vm_domain(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + int num; + unsigned long ndomains; + + ndomains = cap_ndoms(iommu->cap); + for_each_set_bit(num, iommu->domain_ids, ndomains) + if (iommu->domains[num] == domain) + return num; + + return __iommu_attach_domain(domain, iommu); } static void iommu_detach_domain(struct dmar_domain *domain, @@ -1558,17 +1584,53 @@ static void iommu_detach_domain(struct dmar_domain *domain, int num, ndomains; spin_lock_irqsave(&iommu->lock, flags); - ndomains = cap_ndoms(iommu->cap); - for_each_set_bit(num, iommu->domain_ids, ndomains) { - if (iommu->domains[num] == domain) { - clear_bit(num, iommu->domain_ids); - iommu->domains[num] = NULL; - break; + if (domain_type_is_vm_or_si(domain)) { + ndomains = cap_ndoms(iommu->cap); + for_each_set_bit(num, iommu->domain_ids, ndomains) { + if (iommu->domains[num] == domain) { + clear_bit(num, iommu->domain_ids); + iommu->domains[num] = NULL; + break; + } } + } else { + clear_bit(domain->id, iommu->domain_ids); + iommu->domains[domain->id] = NULL; } spin_unlock_irqrestore(&iommu->lock, flags); } +static void domain_attach_iommu(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + unsigned long flags; + + spin_lock_irqsave(&domain->iommu_lock, flags); + if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) { + domain->iommu_count++; + if (domain->iommu_count == 1) + domain->nid = iommu->node; + domain_update_iommu_cap(domain); + } + spin_unlock_irqrestore(&domain->iommu_lock, flags); +} + +static int domain_detach_iommu(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + unsigned long flags; + int count = INT_MAX; + + spin_lock_irqsave(&domain->iommu_lock, flags); + if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) { + count = --domain->iommu_count; + domain_update_iommu_cap(domain); + } + spin_unlock_irqrestore(&domain->iommu_lock, flags); + + return count; +} + static struct iova_domain reserved_iova_list; static struct lock_class_key reserved_rbtree_key; @@ -1706,9 +1768,7 @@ static void domain_exit(struct dmar_domain *domain) /* clear attached or cached domains */ rcu_read_lock(); for_each_active_iommu(iommu, drhd) - if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || - test_bit(iommu->seq_id, domain->iommu_bmp)) - iommu_detach_domain(domain, iommu); + iommu_detach_domain(domain, iommu); rcu_read_unlock(); dma_free_pagelist(freelist); @@ -1723,8 +1783,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, struct context_entry *context; unsigned long flags; struct dma_pte *pgd; - unsigned long num; - unsigned long ndomains; int id; int agaw; struct device_domain_info *info = NULL; @@ -1748,31 +1806,14 @@ static int domain_context_mapping_one(struct dmar_domain *domain, id = domain->id; pgd = domain->pgd; - if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || - domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) { - int found = 0; - - /* find an available domain id for this device in iommu */ - ndomains = cap_ndoms(iommu->cap); - for_each_set_bit(num, iommu->domain_ids, ndomains) { - if (iommu->domains[num] == domain) { - id = num; - found = 1; - break; - } - } - - if (found == 0) { - num = find_first_zero_bit(iommu->domain_ids, ndomains); - if (num >= ndomains) { + if (domain_type_is_vm_or_si(domain)) { + if (domain_type_is_vm(domain)) { + id = iommu_attach_vm_domain(domain, iommu); + if (id < 0) { spin_unlock_irqrestore(&iommu->lock, flags); - printk(KERN_ERR "IOMMU: no free domain ids\n"); + pr_err("IOMMU: no free domain ids\n"); return -EFAULT; } - - set_bit(num, iommu->domain_ids); - iommu->domains[num] = domain; - id = num; } /* Skip top levels of page tables for @@ -1824,72 +1865,68 @@ static int domain_context_mapping_one(struct dmar_domain *domain, (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); - iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH); + iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH); } else { iommu_flush_write_buffer(iommu); } iommu_enable_dev_iotlb(info); spin_unlock_irqrestore(&iommu->lock, flags); - spin_lock_irqsave(&domain->iommu_lock, flags); - if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) { - domain->iommu_count++; - if (domain->iommu_count == 1) - domain->nid = iommu->node; - domain_update_iommu_cap(domain); - } - spin_unlock_irqrestore(&domain->iommu_lock, flags); + domain_attach_iommu(domain, iommu); + return 0; } +struct domain_context_mapping_data { + struct dmar_domain *domain; + struct intel_iommu *iommu; + int translation; +}; + +static int domain_context_mapping_cb(struct pci_dev *pdev, + u16 alias, void *opaque) +{ + struct domain_context_mapping_data *data = opaque; + + return domain_context_mapping_one(data->domain, data->iommu, + PCI_BUS_NUM(alias), alias & 0xff, + data->translation); +} + static int domain_context_mapping(struct dmar_domain *domain, struct device *dev, int translation) { - int ret; - struct pci_dev *pdev, *tmp, *parent; struct intel_iommu *iommu; u8 bus, devfn; + struct domain_context_mapping_data data; iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) return -ENODEV; - ret = domain_context_mapping_one(domain, iommu, bus, devfn, - translation); - if (ret || !dev_is_pci(dev)) - return ret; - - /* dependent device mapping */ - pdev = to_pci_dev(dev); - tmp = pci_find_upstream_pcie_bridge(pdev); - if (!tmp) - return 0; - /* Secondary interface's bus number and devfn 0 */ - parent = pdev->bus->self; - while (parent != tmp) { - ret = domain_context_mapping_one(domain, iommu, - parent->bus->number, - parent->devfn, translation); - if (ret) - return ret; - parent = parent->bus->self; - } - if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */ - return domain_context_mapping_one(domain, iommu, - tmp->subordinate->number, 0, - translation); - else /* this is a legacy PCI bridge */ - return domain_context_mapping_one(domain, iommu, - tmp->bus->number, - tmp->devfn, + if (!dev_is_pci(dev)) + return domain_context_mapping_one(domain, iommu, bus, devfn, translation); + + data.domain = domain; + data.iommu = iommu; + data.translation = translation; + + return pci_for_each_dma_alias(to_pci_dev(dev), + &domain_context_mapping_cb, &data); +} + +static int domain_context_mapped_cb(struct pci_dev *pdev, + u16 alias, void *opaque) +{ + struct intel_iommu *iommu = opaque; + + return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff); } static int domain_context_mapped(struct device *dev) { - int ret; - struct pci_dev *pdev, *tmp, *parent; struct intel_iommu *iommu; u8 bus, devfn; @@ -1897,30 +1934,11 @@ static int domain_context_mapped(struct device *dev) if (!iommu) return -ENODEV; - ret = device_context_mapped(iommu, bus, devfn); - if (!ret || !dev_is_pci(dev)) - return ret; + if (!dev_is_pci(dev)) + return device_context_mapped(iommu, bus, devfn); - /* dependent device mapping */ - pdev = to_pci_dev(dev); - tmp = pci_find_upstream_pcie_bridge(pdev); - if (!tmp) - return ret; - /* Secondary interface's bus number and devfn 0 */ - parent = pdev->bus->self; - while (parent != tmp) { - ret = device_context_mapped(iommu, parent->bus->number, - parent->devfn); - if (!ret) - return ret; - parent = parent->bus->self; - } - if (pci_is_pcie(tmp)) - return device_context_mapped(iommu, tmp->subordinate->number, - 0); - else - return device_context_mapped(iommu, tmp->bus->number, - tmp->devfn); + return !pci_for_each_dma_alias(to_pci_dev(dev), + domain_context_mapped_cb, iommu); } /* Returns a number of VTD pages, but aligned to MM page size */ @@ -1965,12 +1983,11 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, { struct dma_pte *first_pte = NULL, *pte = NULL; phys_addr_t uninitialized_var(pteval); - int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; unsigned long sg_res; unsigned int largepage_lvl = 0; unsigned long lvl_pages = 0; - BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); + BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)); if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; @@ -2004,12 +2021,14 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, /* It is large page*/ if (largepage_lvl > 1) { pteval |= DMA_PTE_LARGE_PAGE; - /* Ensure that old small page tables are removed to make room - for superpage, if they exist. */ - dma_pte_clear_range(domain, iov_pfn, - iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1); + lvl_pages = lvl_to_nr_pages(largepage_lvl); + /* + * Ensure that old small page tables are + * removed to make room for superpage, + * if they exist. + */ dma_pte_free_pagetable(domain, iov_pfn, - iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1); + iov_pfn + lvl_pages - 1); } else { pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; } @@ -2102,31 +2121,20 @@ static inline void unlink_domain_info(struct device_domain_info *info) static void domain_remove_dev_info(struct dmar_domain *domain) { - struct device_domain_info *info; - unsigned long flags, flags2; + struct device_domain_info *info, *tmp; + unsigned long flags; spin_lock_irqsave(&device_domain_lock, flags); - while (!list_empty(&domain->devices)) { - info = list_entry(domain->devices.next, - struct device_domain_info, link); + list_for_each_entry_safe(info, tmp, &domain->devices, link) { unlink_domain_info(info); spin_unlock_irqrestore(&device_domain_lock, flags); iommu_disable_dev_iotlb(info); iommu_detach_dev(info->iommu, info->bus, info->devfn); - if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { + if (domain_type_is_vm(domain)) { iommu_detach_dependent_devices(info->iommu, info->dev); - /* clear this iommu in iommu_bmp, update iommu count - * and capabilities - */ - spin_lock_irqsave(&domain->iommu_lock, flags2); - if (test_and_clear_bit(info->iommu->seq_id, - domain->iommu_bmp)) { - domain->iommu_count--; - domain_update_iommu_cap(domain); - } - spin_unlock_irqrestore(&domain->iommu_lock, flags2); + domain_detach_iommu(domain, info->iommu); } free_devinfo_mem(info); @@ -2181,8 +2189,6 @@ static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu, info->dev = dev; info->domain = domain; info->iommu = iommu; - if (!dev) - domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES; spin_lock_irqsave(&device_domain_lock, flags); if (dev) @@ -2209,79 +2215,86 @@ static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu, return domain; } +static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque) +{ + *(u16 *)opaque = alias; + return 0; +} + /* domain is initialized */ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw) { - struct dmar_domain *domain, *free = NULL; - struct intel_iommu *iommu = NULL; + struct dmar_domain *domain, *tmp; + struct intel_iommu *iommu; struct device_domain_info *info; - struct pci_dev *dev_tmp = NULL; + u16 dma_alias; unsigned long flags; - u8 bus, devfn, bridge_bus, bridge_devfn; + u8 bus, devfn; domain = find_domain(dev); if (domain) return domain; + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return NULL; + if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); - u16 segment; - segment = pci_domain_nr(pdev->bus); - dev_tmp = pci_find_upstream_pcie_bridge(pdev); - if (dev_tmp) { - if (pci_is_pcie(dev_tmp)) { - bridge_bus = dev_tmp->subordinate->number; - bridge_devfn = 0; - } else { - bridge_bus = dev_tmp->bus->number; - bridge_devfn = dev_tmp->devfn; - } - spin_lock_irqsave(&device_domain_lock, flags); - info = dmar_search_domain_by_dev_info(segment, - bridge_bus, - bridge_devfn); - if (info) { - iommu = info->iommu; - domain = info->domain; - } - spin_unlock_irqrestore(&device_domain_lock, flags); - /* pcie-pci bridge already has a domain, uses it */ - if (info) - goto found_domain; + pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias); + + spin_lock_irqsave(&device_domain_lock, flags); + info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus), + PCI_BUS_NUM(dma_alias), + dma_alias & 0xff); + if (info) { + iommu = info->iommu; + domain = info->domain; } - } + spin_unlock_irqrestore(&device_domain_lock, flags); - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - goto error; + /* DMA alias already has a domain, uses it */ + if (info) + goto found_domain; + } /* Allocate and initialize new domain for the device */ - domain = alloc_domain(false); + domain = alloc_domain(0); if (!domain) - goto error; - if (iommu_attach_domain(domain, iommu)) { + return NULL; + domain->id = iommu_attach_domain(domain, iommu); + if (domain->id < 0) { free_domain_mem(domain); - domain = NULL; - goto error; + return NULL; } - free = domain; - if (domain_init(domain, gaw)) - goto error; + domain_attach_iommu(domain, iommu); + if (domain_init(domain, gaw)) { + domain_exit(domain); + return NULL; + } + + /* register PCI DMA alias device */ + if (dev_is_pci(dev)) { + tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias), + dma_alias & 0xff, NULL, domain); + + if (!tmp || tmp != domain) { + domain_exit(domain); + domain = tmp; + } - /* register pcie-to-pci device */ - if (dev_tmp) { - domain = dmar_insert_dev_info(iommu, bridge_bus, bridge_devfn, - NULL, domain); if (!domain) - goto error; + return NULL; } found_domain: - domain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain); -error: - if (free != domain) - domain_exit(free); + tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain); + + if (!tmp || tmp != domain) { + domain_exit(domain); + domain = tmp; + } return domain; } @@ -2405,6 +2418,7 @@ static inline void iommu_prepare_isa(void) printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " "floppy might not work\n"); + pci_dev_put(pdev); } #else static inline void iommu_prepare_isa(void) @@ -2420,19 +2434,25 @@ static int __init si_domain_init(int hw) struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; int nid, ret = 0; + bool first = true; - si_domain = alloc_domain(false); + si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY); if (!si_domain) return -EFAULT; - si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; - for_each_active_iommu(iommu, drhd) { ret = iommu_attach_domain(si_domain, iommu); - if (ret) { + if (ret < 0) { + domain_exit(si_domain); + return -EFAULT; + } else if (first) { + si_domain->id = ret; + first = false; + } else if (si_domain->id != ret) { domain_exit(si_domain); return -EFAULT; } + domain_attach_iommu(si_domain, iommu); } if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { @@ -2523,22 +2543,46 @@ static bool device_has_rmrr(struct device *dev) return false; } +/* + * There are a couple cases where we need to restrict the functionality of + * devices associated with RMRRs. The first is when evaluating a device for + * identity mapping because problems exist when devices are moved in and out + * of domains and their respective RMRR information is lost. This means that + * a device with associated RMRRs will never be in a "passthrough" domain. + * The second is use of the device through the IOMMU API. This interface + * expects to have full control of the IOVA space for the device. We cannot + * satisfy both the requirement that RMRR access is maintained and have an + * unencumbered IOVA space. We also have no ability to quiesce the device's + * use of the RMRR space or even inform the IOMMU API user of the restriction. + * We therefore prevent devices associated with an RMRR from participating in + * the IOMMU API, which eliminates them from device assignment. + * + * In both cases we assume that PCI USB devices with RMRRs have them largely + * for historical reasons and that the RMRR space is not actively used post + * boot. This exclusion may change if vendors begin to abuse it. + */ +static bool device_is_rmrr_locked(struct device *dev) +{ + if (!device_has_rmrr(dev)) + return false; + + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + + if ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) + return false; + } + + return true; +} + static int iommu_should_identity_map(struct device *dev, int startup) { if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); - /* - * We want to prevent any device associated with an RMRR from - * getting placed into the SI Domain. This is done because - * problems exist when devices are moved in and out of domains - * and their respective RMRR info is lost. We exempt USB devices - * from this process due to their usage of RMRRs that are known - * to not be needed after BIOS hand-off to OS. - */ - if (device_has_rmrr(dev) && - (pdev->class >> 8) != PCI_CLASS_SERIAL_USB) + if (device_is_rmrr_locked(dev)) return 0; if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) @@ -2850,11 +2894,7 @@ static int __init init_dmars(void) iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); - - ret = iommu_enable_translation(iommu); - if (ret) - goto free_iommu; - + iommu_enable_translation(iommu); iommu_disable_protect_mem_regions(iommu); } @@ -3091,10 +3131,10 @@ static void flush_unmaps(void) /* On real hardware multiple invalidations are expensive */ if (cap_caching_mode(iommu->cap)) iommu_flush_iotlb_psi(iommu, domain->id, - iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, + iova->pfn_lo, iova_size(iova), !deferred_flush[i].freelist[j], 0); else { - mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1)); + mask = ilog2(mm_to_dma_pfn(iova_size(iova))); iommu_flush_dev_iotlb(deferred_flush[i].domain[j], (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask); } @@ -3144,9 +3184,7 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *f spin_unlock_irqrestore(&async_umap_flush_lock, flags); } -static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) +static void intel_unmap(struct device *dev, dma_addr_t dev_addr) { struct dmar_domain *domain; unsigned long start_pfn, last_pfn; @@ -3190,6 +3228,13 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, } } +static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + intel_unmap(dev, dev_addr); +} + static void *intel_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, struct dma_attrs *attrs) @@ -3246,7 +3291,7 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, size = PAGE_ALIGN(size); order = get_order(size); - intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); + intel_unmap(dev, dma_handle); if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) __free_pages(page, order); } @@ -3255,43 +3300,7 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct dmar_domain *domain; - unsigned long start_pfn, last_pfn; - struct iova *iova; - struct intel_iommu *iommu; - struct page *freelist; - - if (iommu_no_mapping(dev)) - return; - - domain = find_domain(dev); - BUG_ON(!domain); - - iommu = domain_get_iommu(domain); - - iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); - if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n", - (unsigned long long)sglist[0].dma_address)) - return; - - start_pfn = mm_to_dma_pfn(iova->pfn_lo); - last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; - - freelist = domain_unmap(domain, start_pfn, last_pfn); - - if (intel_iommu_strict) { - iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, - last_pfn - start_pfn + 1, !freelist, 0); - /* free iova */ - __free_iova(&domain->iovad, iova); - dma_free_pagelist(freelist); - } else { - add_unmap(domain, iova, freelist); - /* - * queue up the release of the unmap to save the 1/6th of the - * cpu used up by the iotlb flush operation... - */ - } + intel_unmap(dev, sglist[0].dma_address); } static int intel_nontranslate_map_sg(struct device *hddev, @@ -3355,13 +3364,8 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot); if (unlikely(ret)) { - /* clear the page */ - dma_pte_clear_range(domain, start_vpfn, - start_vpfn + size - 1); - /* free page tables */ dma_pte_free_pagetable(domain, start_vpfn, start_vpfn + size - 1); - /* free iova */ __free_iova(&domain->iovad, iova); return 0; } @@ -3568,10 +3572,8 @@ static int init_iommu_hw(void) iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH); - if (iommu_enable_translation(iommu)) - return 1; + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); + iommu_enable_translation(iommu); iommu_disable_protect_mem_regions(iommu); } @@ -3873,9 +3875,7 @@ static int device_notifier(struct notifier_block *nb, down_read(&dmar_global_lock); domain_remove_one_dev_info(domain, dev); - if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && - !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) && - list_empty(&domain->devices)) + if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices)) domain_exit(domain); up_read(&dmar_global_lock); @@ -3935,8 +3935,7 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb, rcu_read_lock(); for_each_active_iommu(iommu, drhd) iommu_flush_iotlb_psi(iommu, si_domain->id, - iova->pfn_lo, - iova->pfn_hi - iova->pfn_lo + 1, + iova->pfn_lo, iova_size(iova), !freelist, 0); rcu_read_unlock(); dma_free_pagelist(freelist); @@ -3955,6 +3954,63 @@ static struct notifier_block intel_iommu_memory_nb = { .priority = 0 }; + +static ssize_t intel_iommu_show_version(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_iommu *iommu = dev_get_drvdata(dev); + u32 ver = readl(iommu->reg + DMAR_VER_REG); + return sprintf(buf, "%d:%d\n", + DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); +} +static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL); + +static ssize_t intel_iommu_show_address(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_iommu *iommu = dev_get_drvdata(dev); + return sprintf(buf, "%llx\n", iommu->reg_phys); +} +static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL); + +static ssize_t intel_iommu_show_cap(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_iommu *iommu = dev_get_drvdata(dev); + return sprintf(buf, "%llx\n", iommu->cap); +} +static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL); + +static ssize_t intel_iommu_show_ecap(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_iommu *iommu = dev_get_drvdata(dev); + return sprintf(buf, "%llx\n", iommu->ecap); +} +static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL); + +static struct attribute *intel_iommu_attrs[] = { + &dev_attr_version.attr, + &dev_attr_address.attr, + &dev_attr_cap.attr, + &dev_attr_ecap.attr, + NULL, +}; + +static struct attribute_group intel_iommu_group = { + .name = "intel-iommu", + .attrs = intel_iommu_attrs, +}; + +const struct attribute_group *intel_iommu_groups[] = { + &intel_iommu_group, + NULL, +}; + int __init intel_iommu_init(void) { int ret = -ENODEV; @@ -4026,6 +4082,11 @@ int __init intel_iommu_init(void) init_iommu_pm_ops(); + for_each_active_iommu(iommu, drhd) + iommu->iommu_dev = iommu_device_create(NULL, iommu, + intel_iommu_groups, + iommu->name); + bus_set_iommu(&pci_bus_type, &intel_iommu_ops); bus_register_notifier(&pci_bus_type, &device_nb); if (si_domain && !hw_pass_through) @@ -4044,33 +4105,27 @@ out_free_dmar: return ret; } +static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque) +{ + struct intel_iommu *iommu = opaque; + + iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff); + return 0; +} + +/* + * NB - intel-iommu lacks any sort of reference counting for the users of + * dependent devices. If multiple endpoints have intersecting dependent + * devices, unbinding the driver from any one of them will possibly leave + * the others unable to operate. + */ static void iommu_detach_dependent_devices(struct intel_iommu *iommu, struct device *dev) { - struct pci_dev *tmp, *parent, *pdev; - if (!iommu || !dev || !dev_is_pci(dev)) return; - pdev = to_pci_dev(dev); - - /* dependent device detach */ - tmp = pci_find_upstream_pcie_bridge(pdev); - /* Secondary interface's bus number and devfn 0 */ - if (tmp) { - parent = pdev->bus->self; - while (parent != tmp) { - iommu_detach_dev(iommu, parent->bus->number, - parent->devfn); - parent = parent->bus->self; - } - if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */ - iommu_detach_dev(iommu, - tmp->subordinate->number, 0); - else /* this is a legacy PCI bridge */ - iommu_detach_dev(iommu, tmp->bus->number, - tmp->devfn); - } + pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu); } static void domain_remove_one_dev_info(struct dmar_domain *domain, @@ -4117,20 +4172,9 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, spin_unlock_irqrestore(&device_domain_lock, flags); if (found == 0) { - unsigned long tmp_flags; - spin_lock_irqsave(&domain->iommu_lock, tmp_flags); - clear_bit(iommu->seq_id, domain->iommu_bmp); - domain->iommu_count--; - domain_update_iommu_cap(domain); - spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); - - if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && - !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) { - spin_lock_irqsave(&iommu->lock, tmp_flags); - clear_bit(domain->id, iommu->domain_ids); - iommu->domains[domain->id] = NULL; - spin_unlock_irqrestore(&iommu->lock, tmp_flags); - } + domain_detach_iommu(domain, iommu); + if (!domain_type_is_vm_or_si(domain)) + iommu_detach_domain(domain, iommu); } } @@ -4150,7 +4194,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_snooping = 0; domain->iommu_superpage = 0; domain->max_addr = 0; - domain->nid = -1; /* always allocate the top pgd */ domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); @@ -4164,7 +4207,7 @@ static int intel_iommu_domain_init(struct iommu_domain *domain) { struct dmar_domain *dmar_domain; - dmar_domain = alloc_domain(true); + dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE); if (!dmar_domain) { printk(KERN_ERR "intel_iommu_domain_init: dmar_domain == NULL\n"); @@ -4202,14 +4245,18 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, int addr_width; u8 bus, devfn; + if (device_is_rmrr_locked(dev)) { + dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); + return -EPERM; + } + /* normally dev is not mapped */ if (unlikely(domain_context_mapped(dev))) { struct dmar_domain *old_domain; old_domain = find_domain(dev); if (old_domain) { - if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || - dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) + if (domain_type_is_vm_or_si(dmar_domain)) domain_remove_one_dev_info(old_domain, dev); else domain_remove_dev_info(old_domain); @@ -4373,99 +4420,42 @@ static int intel_iommu_domain_has_cap(struct iommu_domain *domain, return 0; } -#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) - static int intel_iommu_add_device(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct pci_dev *bridge, *dma_pdev = NULL; + struct intel_iommu *iommu; struct iommu_group *group; - int ret; u8 bus, devfn; - if (!device_to_iommu(dev, &bus, &devfn)) + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) return -ENODEV; - bridge = pci_find_upstream_pcie_bridge(pdev); - if (bridge) { - if (pci_is_pcie(bridge)) - dma_pdev = pci_get_domain_bus_and_slot( - pci_domain_nr(pdev->bus), - bridge->subordinate->number, 0); - if (!dma_pdev) - dma_pdev = pci_dev_get(bridge); - } else - dma_pdev = pci_dev_get(pdev); - - /* Account for quirked devices */ - swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev)); + iommu_device_link(iommu->iommu_dev, dev); - /* - * If it's a multifunction device that does not support our - * required ACS flags, add to the same group as lowest numbered - * function that also does not suport the required ACS flags. - */ - if (dma_pdev->multifunction && - !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) { - u8 i, slot = PCI_SLOT(dma_pdev->devfn); - - for (i = 0; i < 8; i++) { - struct pci_dev *tmp; + group = iommu_group_get_for_dev(dev); - tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i)); - if (!tmp) - continue; - - if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) { - swap_pci_ref(&dma_pdev, tmp); - break; - } - pci_dev_put(tmp); - } - } - - /* - * Devices on the root bus go through the iommu. If that's not us, - * find the next upstream device and test ACS up to the root bus. - * Finding the next device may require skipping virtual buses. - */ - while (!pci_is_root_bus(dma_pdev->bus)) { - struct pci_bus *bus = dma_pdev->bus; - - while (!bus->self) { - if (!pci_is_root_bus(bus)) - bus = bus->parent; - else - goto root_bus; - } - - if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) - break; - - swap_pci_ref(&dma_pdev, pci_dev_get(bus->self)); - } - -root_bus: - group = iommu_group_get(&dma_pdev->dev); - pci_dev_put(dma_pdev); - if (!group) { - group = iommu_group_alloc(); - if (IS_ERR(group)) - return PTR_ERR(group); - } - - ret = iommu_group_add_device(group, dev); + if (IS_ERR(group)) + return PTR_ERR(group); iommu_group_put(group); - return ret; + return 0; } static void intel_iommu_remove_device(struct device *dev) { + struct intel_iommu *iommu; + u8 bus, devfn; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return; + iommu_group_remove_device(dev); + + iommu_device_unlink(iommu->iommu_dev, dev); } -static struct iommu_ops intel_iommu_ops = { +static const struct iommu_ops intel_iommu_ops = { .domain_init = intel_iommu_domain_init, .domain_destroy = intel_iommu_domain_destroy, .attach_dev = intel_iommu_attach_device, diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 9b174893f0f5..0df41f6264f5 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -70,6 +70,11 @@ static int get_irte(int irq, struct irte *entry) raw_spin_lock_irqsave(&irq_2_ir_lock, flags); + if (unlikely(!irq_iommu->iommu)) { + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); + return -1; + } + index = irq_iommu->irte_index + irq_iommu->sub_handle; *entry = *(irq_iommu->iommu->ir_table->base + index); @@ -369,29 +374,52 @@ static int set_hpet_sid(struct irte *irte, u8 id) return 0; } +struct set_msi_sid_data { + struct pci_dev *pdev; + u16 alias; +}; + +static int set_msi_sid_cb(struct pci_dev *pdev, u16 alias, void *opaque) +{ + struct set_msi_sid_data *data = opaque; + + data->pdev = pdev; + data->alias = alias; + + return 0; +} + static int set_msi_sid(struct irte *irte, struct pci_dev *dev) { - struct pci_dev *bridge; + struct set_msi_sid_data data; if (!irte || !dev) return -1; - /* PCIe device or Root Complex integrated PCI device */ - if (pci_is_pcie(dev) || !dev->bus->parent) { - set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, - (dev->bus->number << 8) | dev->devfn); - return 0; - } + pci_for_each_dma_alias(dev, set_msi_sid_cb, &data); - bridge = pci_find_upstream_pcie_bridge(dev); - if (bridge) { - if (pci_is_pcie(bridge))/* this is a PCIe-to-PCI/PCIX bridge */ - set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16, - (bridge->bus->number << 8) | dev->bus->number); - else /* this is a legacy PCI bridge */ - set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, - (bridge->bus->number << 8) | bridge->devfn); - } + /* + * DMA alias provides us with a PCI device and alias. The only case + * where the it will return an alias on a different bus than the + * device is the case of a PCIe-to-PCI bridge, where the alias is for + * the subordinate bus. In this case we can only verify the bus. + * + * If the alias device is on a different bus than our source device + * then we have a topology based alias, use it. + * + * Otherwise, the alias is for a device DMA quirk and we cannot + * assume that MSI uses the same requester ID. Therefore use the + * original device. + */ + if (PCI_BUS_NUM(data.alias) != data.pdev->bus->number) + set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16, + PCI_DEVID(PCI_BUS_NUM(data.alias), + dev->bus->number)); + else if (data.pdev->bus->number != dev->bus->number) + set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, data.alias); + else + set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, + PCI_DEVID(dev->bus->number, dev->devfn)); return 0; } diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c new file mode 100644 index 000000000000..39b2d9127dbf --- /dev/null +++ b/drivers/iommu/iommu-sysfs.c @@ -0,0 +1,134 @@ +/* + * IOMMU sysfs class support + * + * Copyright (C) 2014 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson <alex.williamson@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/device.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/slab.h> + +/* + * We provide a common class "devices" group which initially has no attributes. + * As devices are added to the IOMMU, we'll add links to the group. + */ +static struct attribute *devices_attr[] = { + NULL, +}; + +static const struct attribute_group iommu_devices_attr_group = { + .name = "devices", + .attrs = devices_attr, +}; + +static const struct attribute_group *iommu_dev_groups[] = { + &iommu_devices_attr_group, + NULL, +}; + +static void iommu_release_device(struct device *dev) +{ + kfree(dev); +} + +static struct class iommu_class = { + .name = "iommu", + .dev_release = iommu_release_device, + .dev_groups = iommu_dev_groups, +}; + +static int __init iommu_dev_init(void) +{ + return class_register(&iommu_class); +} +postcore_initcall(iommu_dev_init); + +/* + * Create an IOMMU device and return a pointer to it. IOMMU specific + * attributes can be provided as an attribute group, allowing a unique + * namespace per IOMMU type. + */ +struct device *iommu_device_create(struct device *parent, void *drvdata, + const struct attribute_group **groups, + const char *fmt, ...) +{ + struct device *dev; + va_list vargs; + int ret; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return ERR_PTR(-ENOMEM); + + device_initialize(dev); + + dev->class = &iommu_class; + dev->parent = parent; + dev->groups = groups; + dev_set_drvdata(dev, drvdata); + + va_start(vargs, fmt); + ret = kobject_set_name_vargs(&dev->kobj, fmt, vargs); + va_end(vargs); + if (ret) + goto error; + + ret = device_add(dev); + if (ret) + goto error; + + return dev; + +error: + put_device(dev); + return ERR_PTR(ret); +} + +void iommu_device_destroy(struct device *dev) +{ + if (!dev || IS_ERR(dev)) + return; + + device_unregister(dev); +} + +/* + * IOMMU drivers can indicate a device is managed by a given IOMMU using + * this interface. A link to the device will be created in the "devices" + * directory of the IOMMU device in sysfs and an "iommu" link will be + * created under the linked device, pointing back at the IOMMU device. + */ +int iommu_device_link(struct device *dev, struct device *link) +{ + int ret; + + if (!dev || IS_ERR(dev)) + return -ENODEV; + + ret = sysfs_add_link_to_group(&dev->kobj, "devices", + &link->kobj, dev_name(link)); + if (ret) + return ret; + + ret = sysfs_create_link_nowarn(&link->kobj, &dev->kobj, "iommu"); + if (ret) + sysfs_remove_link_from_group(&dev->kobj, "devices", + dev_name(link)); + + return ret; +} + +void iommu_device_unlink(struct device *dev, struct device *link) +{ + if (!dev || IS_ERR(dev)) + return; + + sysfs_remove_link(&link->kobj, "iommu"); + sysfs_remove_link_from_group(&dev->kobj, "devices", dev_name(link)); +} diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index e5555fcfe703..169836020208 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -29,12 +29,17 @@ #include <linux/idr.h> #include <linux/notifier.h> #include <linux/err.h> +#include <linux/pci.h> #include <trace/events/iommu.h> static struct kset *iommu_group_kset; static struct ida iommu_group_ida; static struct mutex iommu_group_mutex; +struct iommu_callback_data { + const struct iommu_ops *ops; +}; + struct iommu_group { struct kobject kobj; struct kobject *devices_kobj; @@ -514,9 +519,191 @@ int iommu_group_id(struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_group_id); +/* + * To consider a PCI device isolated, we require ACS to support Source + * Validation, Request Redirection, Completer Redirection, and Upstream + * Forwarding. This effectively means that devices cannot spoof their + * requester ID, requests and completions cannot be redirected, and all + * transactions are forwarded upstream, even as it passes through a + * bridge where the target device is downstream. + */ +#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) + +struct group_for_pci_data { + struct pci_dev *pdev; + struct iommu_group *group; +}; + +/* + * DMA alias iterator callback, return the last seen device. Stop and return + * the IOMMU group if we find one along the way. + */ +static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) +{ + struct group_for_pci_data *data = opaque; + + data->pdev = pdev; + data->group = iommu_group_get(&pdev->dev); + + return data->group != NULL; +} + +/* + * Use standard PCI bus topology, isolation features, and DMA alias quirks + * to find or create an IOMMU group for a device. + */ +static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev) +{ + struct group_for_pci_data data; + struct pci_bus *bus; + struct iommu_group *group = NULL; + struct pci_dev *tmp; + + /* + * Find the upstream DMA alias for the device. A device must not + * be aliased due to topology in order to have its own IOMMU group. + * If we find an alias along the way that already belongs to a + * group, use it. + */ + if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) + return data.group; + + pdev = data.pdev; + + /* + * Continue upstream from the point of minimum IOMMU granularity + * due to aliases to the point where devices are protected from + * peer-to-peer DMA by PCI ACS. Again, if we find an existing + * group, use it. + */ + for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { + if (!bus->self) + continue; + + if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) + break; + + pdev = bus->self; + + group = iommu_group_get(&pdev->dev); + if (group) + return group; + } + + /* + * Next we need to consider DMA alias quirks. If one device aliases + * to another, they should be grouped together. It's theoretically + * possible that aliases could create chains of devices where each + * device aliases another device. If we then factor in multifunction + * ACS grouping requirements, each alias could incorporate a new slot + * with multiple functions, each with aliases. This is all extremely + * unlikely as DMA alias quirks are typically only used for PCIe + * devices where we usually have a single slot per bus. Furthermore, + * the alias quirk is usually to another function within the slot + * (and ACS multifunction is not supported) or to a different slot + * that doesn't physically exist. The likely scenario is therefore + * that everything on the bus gets grouped together. To reduce the + * problem space, share the IOMMU group for all devices on the bus + * if a DMA alias quirk is present on the bus. + */ + tmp = NULL; + for_each_pci_dev(tmp) { + if (tmp->bus != pdev->bus || + !(tmp->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN)) + continue; + + pci_dev_put(tmp); + tmp = NULL; + + /* We have an alias quirk, search for an existing group */ + for_each_pci_dev(tmp) { + struct iommu_group *group_tmp; + + if (tmp->bus != pdev->bus) + continue; + + group_tmp = iommu_group_get(&tmp->dev); + if (!group) { + group = group_tmp; + continue; + } + + if (group_tmp) { + WARN_ON(group != group_tmp); + iommu_group_put(group_tmp); + } + } + + return group ? group : iommu_group_alloc(); + } + + /* + * Non-multifunction devices or multifunction devices supporting + * ACS get their own group. + */ + if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) + return iommu_group_alloc(); + + /* + * Multifunction devices not supporting ACS share a group with other + * similar devices in the same slot. + */ + tmp = NULL; + for_each_pci_dev(tmp) { + if (tmp == pdev || tmp->bus != pdev->bus || + PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || + pci_acs_enabled(tmp, REQ_ACS_FLAGS)) + continue; + + group = iommu_group_get(&tmp->dev); + if (group) { + pci_dev_put(tmp); + return group; + } + } + + /* No shared group found, allocate new */ + return iommu_group_alloc(); +} + +/** + * iommu_group_get_for_dev - Find or create the IOMMU group for a device + * @dev: target device + * + * This function is intended to be called by IOMMU drivers and extended to + * support common, bus-defined algorithms when determining or creating the + * IOMMU group for a device. On success, the caller will hold a reference + * to the returned IOMMU group, which will already include the provided + * device. The reference should be released with iommu_group_put(). + */ +struct iommu_group *iommu_group_get_for_dev(struct device *dev) +{ + struct iommu_group *group = ERR_PTR(-EIO); + int ret; + + group = iommu_group_get(dev); + if (group) + return group; + + if (dev_is_pci(dev)) + group = iommu_group_get_for_pci_dev(to_pci_dev(dev)); + + if (IS_ERR(group)) + return group; + + ret = iommu_group_add_device(group, dev); + if (ret) { + iommu_group_put(group); + return ERR_PTR(ret); + } + + return group; +} + static int add_iommu_group(struct device *dev, void *data) { - struct iommu_ops *ops = data; + struct iommu_callback_data *cb = data; + const struct iommu_ops *ops = cb->ops; if (!ops->add_device) return -ENODEV; @@ -532,7 +719,7 @@ static int iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_group *group; unsigned long group_action = 0; @@ -585,10 +772,14 @@ static struct notifier_block iommu_bus_nb = { .notifier_call = iommu_bus_notifier, }; -static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops) +static void iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops) { + struct iommu_callback_data cb = { + .ops = ops, + }; + bus_register_notifier(bus, &iommu_bus_nb); - bus_for_each_dev(bus, NULL, ops, add_iommu_group); + bus_for_each_dev(bus, NULL, &cb, add_iommu_group); } /** @@ -604,7 +795,7 @@ static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops) * is set up. With this function the iommu-driver can set the iommu-ops * afterwards. */ -int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops) +int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops) { if (bus->iommu_ops != NULL) return -EBUSY; diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 53cde086e83b..7dab5cbcc775 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1120,7 +1120,7 @@ static void ipmmu_remove_device(struct device *dev) dev->archdata.iommu = NULL; } -static struct iommu_ops ipmmu_ops = { +static const struct iommu_ops ipmmu_ops = { .domain_init = ipmmu_domain_init, .domain_destroy = ipmmu_domain_destroy, .attach_dev = ipmmu_attach_device, diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index f5ff657f49fa..49f41d6e02f1 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -674,7 +674,7 @@ fail: return 0; } -static struct iommu_ops msm_iommu_ops = { +static const struct iommu_ops msm_iommu_ops = { .domain_init = msm_iommu_domain_init, .domain_destroy = msm_iommu_domain_destroy, .attach_dev = msm_iommu_attach_dev, diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c index 80fffba7f12d..531658d17333 100644 --- a/drivers/iommu/omap-iommu-debug.c +++ b/drivers/iommu/omap-iommu-debug.c @@ -213,116 +213,6 @@ static ssize_t debug_read_pagetable(struct file *file, char __user *userbuf, return bytes; } -static ssize_t debug_read_mmap(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct device *dev = file->private_data; - struct omap_iommu *obj = dev_to_omap_iommu(dev); - char *p, *buf; - struct iovm_struct *tmp; - int uninitialized_var(i); - ssize_t bytes; - - buf = (char *)__get_free_page(GFP_KERNEL); - if (!buf) - return -ENOMEM; - p = buf; - - p += sprintf(p, "%-3s %-8s %-8s %6s %8s\n", - "No", "start", "end", "size", "flags"); - p += sprintf(p, "-------------------------------------------------\n"); - - mutex_lock(&iommu_debug_lock); - - list_for_each_entry(tmp, &obj->mmap, list) { - size_t len; - const char *str = "%3d %08x-%08x %6x %8x\n"; - const int maxcol = 39; - - len = tmp->da_end - tmp->da_start; - p += snprintf(p, maxcol, str, - i, tmp->da_start, tmp->da_end, len, tmp->flags); - - if (PAGE_SIZE - (p - buf) < maxcol) - break; - i++; - } - - bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); - - mutex_unlock(&iommu_debug_lock); - free_page((unsigned long)buf); - - return bytes; -} - -static ssize_t debug_read_mem(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct device *dev = file->private_data; - char *p, *buf; - struct iovm_struct *area; - ssize_t bytes; - - count = min_t(ssize_t, count, PAGE_SIZE); - - buf = (char *)__get_free_page(GFP_KERNEL); - if (!buf) - return -ENOMEM; - p = buf; - - mutex_lock(&iommu_debug_lock); - - area = omap_find_iovm_area(dev, (u32)ppos); - if (!area) { - bytes = -EINVAL; - goto err_out; - } - memcpy(p, area->va, count); - p += count; - - bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); -err_out: - mutex_unlock(&iommu_debug_lock); - free_page((unsigned long)buf); - - return bytes; -} - -static ssize_t debug_write_mem(struct file *file, const char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct device *dev = file->private_data; - struct iovm_struct *area; - char *p, *buf; - - count = min_t(size_t, count, PAGE_SIZE); - - buf = (char *)__get_free_page(GFP_KERNEL); - if (!buf) - return -ENOMEM; - p = buf; - - mutex_lock(&iommu_debug_lock); - - if (copy_from_user(p, userbuf, count)) { - count = -EFAULT; - goto err_out; - } - - area = omap_find_iovm_area(dev, (u32)ppos); - if (!area) { - count = -EINVAL; - goto err_out; - } - memcpy(area->va, p, count); -err_out: - mutex_unlock(&iommu_debug_lock); - free_page((unsigned long)buf); - - return count; -} - #define DEBUG_FOPS(name) \ static const struct file_operations debug_##name##_fops = { \ .open = simple_open, \ @@ -342,8 +232,6 @@ DEBUG_FOPS_RO(ver); DEBUG_FOPS_RO(regs); DEBUG_FOPS_RO(tlb); DEBUG_FOPS(pagetable); -DEBUG_FOPS_RO(mmap); -DEBUG_FOPS(mem); #define __DEBUG_ADD_FILE(attr, mode) \ { \ @@ -389,8 +277,6 @@ static int iommu_debug_register(struct device *dev, void *data) DEBUG_ADD_FILE_RO(regs); DEBUG_ADD_FILE_RO(tlb); DEBUG_ADD_FILE(pagetable); - DEBUG_ADD_FILE_RO(mmap); - DEBUG_ADD_FILE(mem); return 0; diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 895af06a667f..e202b0c24120 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -959,31 +959,18 @@ static int omap_iommu_probe(struct platform_device *pdev) return err; if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8) return -EINVAL; - /* - * da_start and da_end are needed for omap-iovmm, so hardcode - * these values as used by OMAP3 ISP - the only user for - * omap-iovmm - */ - obj->da_start = 0; - obj->da_end = 0xfffff000; if (of_find_property(of, "ti,iommu-bus-err-back", NULL)) obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN; } else { obj->nr_tlb_entries = pdata->nr_tlb_entries; obj->name = pdata->name; - obj->da_start = pdata->da_start; - obj->da_end = pdata->da_end; } - if (obj->da_end <= obj->da_start) - return -EINVAL; obj->dev = &pdev->dev; obj->ctx = (void *)obj + sizeof(*obj); spin_lock_init(&obj->iommu_lock); - mutex_init(&obj->mmap_lock); spin_lock_init(&obj->page_table_lock); - INIT_LIST_HEAD(&obj->mmap); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); obj->regbase = devm_ioremap_resource(obj->dev, res); @@ -1291,7 +1278,7 @@ static void omap_iommu_remove_device(struct device *dev) kfree(arch_data); } -static struct iommu_ops omap_iommu_ops = { +static const struct iommu_ops omap_iommu_ops = { .domain_init = omap_iommu_domain_init, .domain_destroy = omap_iommu_domain_destroy, .attach_dev = omap_iommu_attach_dev, diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index ea920c3e94ff..1275a822934b 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -46,12 +46,7 @@ struct omap_iommu { int nr_tlb_entries; - struct list_head mmap; - struct mutex mmap_lock; /* protect mmap */ - void *ctx; /* iommu context: registres saved area */ - u32 da_start; - u32 da_end; int has_bus_err_back; }; @@ -154,9 +149,12 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev) #define MMU_RAM_PADDR_MASK \ ((~0UL >> MMU_RAM_PADDR_SHIFT) << MMU_RAM_PADDR_SHIFT) +#define MMU_RAM_ENDIAN_SHIFT 9 #define MMU_RAM_ENDIAN_MASK (1 << MMU_RAM_ENDIAN_SHIFT) +#define MMU_RAM_ENDIAN_LITTLE (0 << MMU_RAM_ENDIAN_SHIFT) #define MMU_RAM_ENDIAN_BIG (1 << MMU_RAM_ENDIAN_SHIFT) +#define MMU_RAM_ELSZ_SHIFT 7 #define MMU_RAM_ELSZ_MASK (3 << MMU_RAM_ELSZ_SHIFT) #define MMU_RAM_ELSZ_8 (0 << MMU_RAM_ELSZ_SHIFT) #define MMU_RAM_ELSZ_16 (1 << MMU_RAM_ELSZ_SHIFT) diff --git a/drivers/iommu/omap-iovmm.c b/drivers/iommu/omap-iovmm.c deleted file mode 100644 index d14725984153..000000000000 --- a/drivers/iommu/omap-iovmm.c +++ /dev/null @@ -1,791 +0,0 @@ -/* - * omap iommu: simple virtual address space management - * - * Copyright (C) 2008-2009 Nokia Corporation - * - * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/err.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/device.h> -#include <linux/scatterlist.h> -#include <linux/iommu.h> -#include <linux/omap-iommu.h> -#include <linux/platform_data/iommu-omap.h> - -#include <asm/cacheflush.h> -#include <asm/mach/map.h> - -#include "omap-iopgtable.h" -#include "omap-iommu.h" - -/* - * IOVMF_FLAGS: attribute for iommu virtual memory area(iovma) - * - * lower 16 bit is used for h/w and upper 16 bit is for s/w. - */ -#define IOVMF_SW_SHIFT 16 - -/* - * iovma: h/w flags derived from cam and ram attribute - */ -#define IOVMF_CAM_MASK (~((1 << 10) - 1)) -#define IOVMF_RAM_MASK (~IOVMF_CAM_MASK) - -#define IOVMF_PGSZ_MASK (3 << 0) -#define IOVMF_PGSZ_1M MMU_CAM_PGSZ_1M -#define IOVMF_PGSZ_64K MMU_CAM_PGSZ_64K -#define IOVMF_PGSZ_4K MMU_CAM_PGSZ_4K -#define IOVMF_PGSZ_16M MMU_CAM_PGSZ_16M - -#define IOVMF_ENDIAN_MASK (1 << 9) -#define IOVMF_ENDIAN_BIG MMU_RAM_ENDIAN_BIG - -#define IOVMF_ELSZ_MASK (3 << 7) -#define IOVMF_ELSZ_16 MMU_RAM_ELSZ_16 -#define IOVMF_ELSZ_32 MMU_RAM_ELSZ_32 -#define IOVMF_ELSZ_NONE MMU_RAM_ELSZ_NONE - -#define IOVMF_MIXED_MASK (1 << 6) -#define IOVMF_MIXED MMU_RAM_MIXED - -/* - * iovma: s/w flags, used for mapping and umapping internally. - */ -#define IOVMF_MMIO (1 << IOVMF_SW_SHIFT) -#define IOVMF_ALLOC (2 << IOVMF_SW_SHIFT) -#define IOVMF_ALLOC_MASK (3 << IOVMF_SW_SHIFT) - -/* "superpages" is supported just with physically linear pages */ -#define IOVMF_DISCONT (1 << (2 + IOVMF_SW_SHIFT)) -#define IOVMF_LINEAR (2 << (2 + IOVMF_SW_SHIFT)) -#define IOVMF_LINEAR_MASK (3 << (2 + IOVMF_SW_SHIFT)) - -#define IOVMF_DA_FIXED (1 << (4 + IOVMF_SW_SHIFT)) - -static struct kmem_cache *iovm_area_cachep; - -/* return the offset of the first scatterlist entry in a sg table */ -static unsigned int sgtable_offset(const struct sg_table *sgt) -{ - if (!sgt || !sgt->nents) - return 0; - - return sgt->sgl->offset; -} - -/* return total bytes of sg buffers */ -static size_t sgtable_len(const struct sg_table *sgt) -{ - unsigned int i, total = 0; - struct scatterlist *sg; - - if (!sgt) - return 0; - - for_each_sg(sgt->sgl, sg, sgt->nents, i) { - size_t bytes; - - bytes = sg->length + sg->offset; - - if (!iopgsz_ok(bytes)) { - pr_err("%s: sg[%d] not iommu pagesize(%u %u)\n", - __func__, i, bytes, sg->offset); - return 0; - } - - if (i && sg->offset) { - pr_err("%s: sg[%d] offset not allowed in internal entries\n", - __func__, i); - return 0; - } - - total += bytes; - } - - return total; -} -#define sgtable_ok(x) (!!sgtable_len(x)) - -static unsigned max_alignment(u32 addr) -{ - int i; - unsigned pagesize[] = { SZ_16M, SZ_1M, SZ_64K, SZ_4K, }; - for (i = 0; i < ARRAY_SIZE(pagesize) && addr & (pagesize[i] - 1); i++) - ; - return (i < ARRAY_SIZE(pagesize)) ? pagesize[i] : 0; -} - -/* - * calculate the optimal number sg elements from total bytes based on - * iommu superpages - */ -static unsigned sgtable_nents(size_t bytes, u32 da, u32 pa) -{ - unsigned nr_entries = 0, ent_sz; - - if (!IS_ALIGNED(bytes, PAGE_SIZE)) { - pr_err("%s: wrong size %08x\n", __func__, bytes); - return 0; - } - - while (bytes) { - ent_sz = max_alignment(da | pa); - ent_sz = min_t(unsigned, ent_sz, iopgsz_max(bytes)); - nr_entries++; - da += ent_sz; - pa += ent_sz; - bytes -= ent_sz; - } - - return nr_entries; -} - -/* allocate and initialize sg_table header(a kind of 'superblock') */ -static struct sg_table *sgtable_alloc(const size_t bytes, u32 flags, - u32 da, u32 pa) -{ - unsigned int nr_entries; - int err; - struct sg_table *sgt; - - if (!bytes) - return ERR_PTR(-EINVAL); - - if (!IS_ALIGNED(bytes, PAGE_SIZE)) - return ERR_PTR(-EINVAL); - - if (flags & IOVMF_LINEAR) { - nr_entries = sgtable_nents(bytes, da, pa); - if (!nr_entries) - return ERR_PTR(-EINVAL); - } else - nr_entries = bytes / PAGE_SIZE; - - sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); - if (!sgt) - return ERR_PTR(-ENOMEM); - - err = sg_alloc_table(sgt, nr_entries, GFP_KERNEL); - if (err) { - kfree(sgt); - return ERR_PTR(err); - } - - pr_debug("%s: sgt:%p(%d entries)\n", __func__, sgt, nr_entries); - - return sgt; -} - -/* free sg_table header(a kind of superblock) */ -static void sgtable_free(struct sg_table *sgt) -{ - if (!sgt) - return; - - sg_free_table(sgt); - kfree(sgt); - - pr_debug("%s: sgt:%p\n", __func__, sgt); -} - -/* map 'sglist' to a contiguous mpu virtual area and return 'va' */ -static void *vmap_sg(const struct sg_table *sgt) -{ - u32 va; - size_t total; - unsigned int i; - struct scatterlist *sg; - struct vm_struct *new; - const struct mem_type *mtype; - - mtype = get_mem_type(MT_DEVICE); - if (!mtype) - return ERR_PTR(-EINVAL); - - total = sgtable_len(sgt); - if (!total) - return ERR_PTR(-EINVAL); - - new = __get_vm_area(total, VM_IOREMAP, VMALLOC_START, VMALLOC_END); - if (!new) - return ERR_PTR(-ENOMEM); - va = (u32)new->addr; - - for_each_sg(sgt->sgl, sg, sgt->nents, i) { - size_t bytes; - u32 pa; - int err; - - pa = sg_phys(sg) - sg->offset; - bytes = sg->length + sg->offset; - - BUG_ON(bytes != PAGE_SIZE); - - err = ioremap_page(va, pa, mtype); - if (err) - goto err_out; - - va += bytes; - } - - flush_cache_vmap((unsigned long)new->addr, - (unsigned long)(new->addr + total)); - return new->addr; - -err_out: - WARN_ON(1); /* FIXME: cleanup some mpu mappings */ - vunmap(new->addr); - return ERR_PTR(-EAGAIN); -} - -static inline void vunmap_sg(const void *va) -{ - vunmap(va); -} - -static struct iovm_struct *__find_iovm_area(struct omap_iommu *obj, - const u32 da) -{ - struct iovm_struct *tmp; - - list_for_each_entry(tmp, &obj->mmap, list) { - if ((da >= tmp->da_start) && (da < tmp->da_end)) { - size_t len; - - len = tmp->da_end - tmp->da_start; - - dev_dbg(obj->dev, "%s: %08x-%08x-%08x(%x) %08x\n", - __func__, tmp->da_start, da, tmp->da_end, len, - tmp->flags); - - return tmp; - } - } - - return NULL; -} - -/** - * omap_find_iovm_area - find iovma which includes @da - * @dev: client device - * @da: iommu device virtual address - * - * Find the existing iovma starting at @da - */ -struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da) -{ - struct omap_iommu *obj = dev_to_omap_iommu(dev); - struct iovm_struct *area; - - mutex_lock(&obj->mmap_lock); - area = __find_iovm_area(obj, da); - mutex_unlock(&obj->mmap_lock); - - return area; -} -EXPORT_SYMBOL_GPL(omap_find_iovm_area); - -/* - * This finds the hole(area) which fits the requested address and len - * in iovmas mmap, and returns the new allocated iovma. - */ -static struct iovm_struct *alloc_iovm_area(struct omap_iommu *obj, u32 da, - size_t bytes, u32 flags) -{ - struct iovm_struct *new, *tmp; - u32 start, prev_end, alignment; - - if (!obj || !bytes) - return ERR_PTR(-EINVAL); - - start = da; - alignment = PAGE_SIZE; - - if (~flags & IOVMF_DA_FIXED) { - /* Don't map address 0 */ - start = obj->da_start ? obj->da_start : alignment; - - if (flags & IOVMF_LINEAR) - alignment = iopgsz_max(bytes); - start = roundup(start, alignment); - } else if (start < obj->da_start || start > obj->da_end || - obj->da_end - start < bytes) { - return ERR_PTR(-EINVAL); - } - - tmp = NULL; - if (list_empty(&obj->mmap)) - goto found; - - prev_end = 0; - list_for_each_entry(tmp, &obj->mmap, list) { - - if (prev_end > start) - break; - - if (tmp->da_start > start && (tmp->da_start - start) >= bytes) - goto found; - - if (tmp->da_end >= start && ~flags & IOVMF_DA_FIXED) - start = roundup(tmp->da_end + 1, alignment); - - prev_end = tmp->da_end; - } - - if ((start >= prev_end) && (obj->da_end - start >= bytes)) - goto found; - - dev_dbg(obj->dev, "%s: no space to fit %08x(%x) flags: %08x\n", - __func__, da, bytes, flags); - - return ERR_PTR(-EINVAL); - -found: - new = kmem_cache_zalloc(iovm_area_cachep, GFP_KERNEL); - if (!new) - return ERR_PTR(-ENOMEM); - - new->iommu = obj; - new->da_start = start; - new->da_end = start + bytes; - new->flags = flags; - - /* - * keep ascending order of iovmas - */ - if (tmp) - list_add_tail(&new->list, &tmp->list); - else - list_add(&new->list, &obj->mmap); - - dev_dbg(obj->dev, "%s: found %08x-%08x-%08x(%x) %08x\n", - __func__, new->da_start, start, new->da_end, bytes, flags); - - return new; -} - -static void free_iovm_area(struct omap_iommu *obj, struct iovm_struct *area) -{ - size_t bytes; - - BUG_ON(!obj || !area); - - bytes = area->da_end - area->da_start; - - dev_dbg(obj->dev, "%s: %08x-%08x(%x) %08x\n", - __func__, area->da_start, area->da_end, bytes, area->flags); - - list_del(&area->list); - kmem_cache_free(iovm_area_cachep, area); -} - -/** - * omap_da_to_va - convert (d) to (v) - * @dev: client device - * @da: iommu device virtual address - * @va: mpu virtual address - * - * Returns mpu virtual addr which corresponds to a given device virtual addr - */ -void *omap_da_to_va(struct device *dev, u32 da) -{ - struct omap_iommu *obj = dev_to_omap_iommu(dev); - void *va = NULL; - struct iovm_struct *area; - - mutex_lock(&obj->mmap_lock); - - area = __find_iovm_area(obj, da); - if (!area) { - dev_dbg(obj->dev, "%s: no da area(%08x)\n", __func__, da); - goto out; - } - va = area->va; -out: - mutex_unlock(&obj->mmap_lock); - - return va; -} -EXPORT_SYMBOL_GPL(omap_da_to_va); - -static void sgtable_fill_vmalloc(struct sg_table *sgt, void *_va) -{ - unsigned int i; - struct scatterlist *sg; - void *va = _va; - void *va_end; - - for_each_sg(sgt->sgl, sg, sgt->nents, i) { - struct page *pg; - const size_t bytes = PAGE_SIZE; - - /* - * iommu 'superpage' isn't supported with 'omap_iommu_vmalloc()' - */ - pg = vmalloc_to_page(va); - BUG_ON(!pg); - sg_set_page(sg, pg, bytes, 0); - - va += bytes; - } - - va_end = _va + PAGE_SIZE * i; -} - -static inline void sgtable_drain_vmalloc(struct sg_table *sgt) -{ - /* - * Actually this is not necessary at all, just exists for - * consistency of the code readability. - */ - BUG_ON(!sgt); -} - -/* create 'da' <-> 'pa' mapping from 'sgt' */ -static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new, - const struct sg_table *sgt, u32 flags) -{ - int err; - unsigned int i, j; - struct scatterlist *sg; - u32 da = new->da_start; - - if (!domain || !sgt) - return -EINVAL; - - BUG_ON(!sgtable_ok(sgt)); - - for_each_sg(sgt->sgl, sg, sgt->nents, i) { - u32 pa; - size_t bytes; - - pa = sg_phys(sg) - sg->offset; - bytes = sg->length + sg->offset; - - flags &= ~IOVMF_PGSZ_MASK; - - if (bytes_to_iopgsz(bytes) < 0) - goto err_out; - - pr_debug("%s: [%d] %08x %08x(%x)\n", __func__, - i, da, pa, bytes); - - err = iommu_map(domain, da, pa, bytes, flags); - if (err) - goto err_out; - - da += bytes; - } - return 0; - -err_out: - da = new->da_start; - - for_each_sg(sgt->sgl, sg, i, j) { - size_t bytes; - - bytes = sg->length + sg->offset; - - /* ignore failures.. we're already handling one */ - iommu_unmap(domain, da, bytes); - - da += bytes; - } - return err; -} - -/* release 'da' <-> 'pa' mapping */ -static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj, - struct iovm_struct *area) -{ - u32 start; - size_t total = area->da_end - area->da_start; - const struct sg_table *sgt = area->sgt; - struct scatterlist *sg; - int i; - size_t unmapped; - - BUG_ON(!sgtable_ok(sgt)); - BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE)); - - start = area->da_start; - for_each_sg(sgt->sgl, sg, sgt->nents, i) { - size_t bytes; - - bytes = sg->length + sg->offset; - - unmapped = iommu_unmap(domain, start, bytes); - if (unmapped < bytes) - break; - - dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n", - __func__, start, bytes, area->flags); - - BUG_ON(!IS_ALIGNED(bytes, PAGE_SIZE)); - - total -= bytes; - start += bytes; - } - BUG_ON(total); -} - -/* template function for all unmapping */ -static struct sg_table *unmap_vm_area(struct iommu_domain *domain, - struct omap_iommu *obj, const u32 da, - void (*fn)(const void *), u32 flags) -{ - struct sg_table *sgt = NULL; - struct iovm_struct *area; - - if (!IS_ALIGNED(da, PAGE_SIZE)) { - dev_err(obj->dev, "%s: alignment err(%08x)\n", __func__, da); - return NULL; - } - - mutex_lock(&obj->mmap_lock); - - area = __find_iovm_area(obj, da); - if (!area) { - dev_dbg(obj->dev, "%s: no da area(%08x)\n", __func__, da); - goto out; - } - - if ((area->flags & flags) != flags) { - dev_err(obj->dev, "%s: wrong flags(%08x)\n", __func__, - area->flags); - goto out; - } - sgt = (struct sg_table *)area->sgt; - - unmap_iovm_area(domain, obj, area); - - fn(area->va); - - dev_dbg(obj->dev, "%s: %08x-%08x-%08x(%x) %08x\n", __func__, - area->da_start, da, area->da_end, - area->da_end - area->da_start, area->flags); - - free_iovm_area(obj, area); -out: - mutex_unlock(&obj->mmap_lock); - - return sgt; -} - -static u32 map_iommu_region(struct iommu_domain *domain, struct omap_iommu *obj, - u32 da, const struct sg_table *sgt, void *va, - size_t bytes, u32 flags) -{ - int err = -ENOMEM; - struct iovm_struct *new; - - mutex_lock(&obj->mmap_lock); - - new = alloc_iovm_area(obj, da, bytes, flags); - if (IS_ERR(new)) { - err = PTR_ERR(new); - goto err_alloc_iovma; - } - new->va = va; - new->sgt = sgt; - - if (map_iovm_area(domain, new, sgt, new->flags)) - goto err_map; - - mutex_unlock(&obj->mmap_lock); - - dev_dbg(obj->dev, "%s: da:%08x(%x) flags:%08x va:%p\n", - __func__, new->da_start, bytes, new->flags, va); - - return new->da_start; - -err_map: - free_iovm_area(obj, new); -err_alloc_iovma: - mutex_unlock(&obj->mmap_lock); - return err; -} - -static inline u32 -__iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj, - u32 da, const struct sg_table *sgt, - void *va, size_t bytes, u32 flags) -{ - return map_iommu_region(domain, obj, da, sgt, va, bytes, flags); -} - -/** - * omap_iommu_vmap - (d)-(p)-(v) address mapper - * @domain: iommu domain - * @dev: client device - * @sgt: address of scatter gather table - * @flags: iovma and page property - * - * Creates 1-n-1 mapping with given @sgt and returns @da. - * All @sgt element must be io page size aligned. - */ -u32 omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da, - const struct sg_table *sgt, u32 flags) -{ - struct omap_iommu *obj = dev_to_omap_iommu(dev); - size_t bytes; - void *va = NULL; - - if (!obj || !obj->dev || !sgt) - return -EINVAL; - - bytes = sgtable_len(sgt); - if (!bytes) - return -EINVAL; - bytes = PAGE_ALIGN(bytes); - - if (flags & IOVMF_MMIO) { - va = vmap_sg(sgt); - if (IS_ERR(va)) - return PTR_ERR(va); - } - - flags |= IOVMF_DISCONT; - flags |= IOVMF_MMIO; - - da = __iommu_vmap(domain, obj, da, sgt, va, bytes, flags); - if (IS_ERR_VALUE(da)) - vunmap_sg(va); - - return da + sgtable_offset(sgt); -} -EXPORT_SYMBOL_GPL(omap_iommu_vmap); - -/** - * omap_iommu_vunmap - release virtual mapping obtained by 'omap_iommu_vmap()' - * @domain: iommu domain - * @dev: client device - * @da: iommu device virtual address - * - * Free the iommu virtually contiguous memory area starting at - * @da, which was returned by 'omap_iommu_vmap()'. - */ -struct sg_table * -omap_iommu_vunmap(struct iommu_domain *domain, struct device *dev, u32 da) -{ - struct omap_iommu *obj = dev_to_omap_iommu(dev); - struct sg_table *sgt; - /* - * 'sgt' is allocated before 'omap_iommu_vmalloc()' is called. - * Just returns 'sgt' to the caller to free - */ - da &= PAGE_MASK; - sgt = unmap_vm_area(domain, obj, da, vunmap_sg, - IOVMF_DISCONT | IOVMF_MMIO); - if (!sgt) - dev_dbg(obj->dev, "%s: No sgt\n", __func__); - return sgt; -} -EXPORT_SYMBOL_GPL(omap_iommu_vunmap); - -/** - * omap_iommu_vmalloc - (d)-(p)-(v) address allocator and mapper - * @dev: client device - * @da: contiguous iommu virtual memory - * @bytes: allocation size - * @flags: iovma and page property - * - * Allocate @bytes linearly and creates 1-n-1 mapping and returns - * @da again, which might be adjusted if 'IOVMF_DA_FIXED' is not set. - */ -u32 -omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev, u32 da, - size_t bytes, u32 flags) -{ - struct omap_iommu *obj = dev_to_omap_iommu(dev); - void *va; - struct sg_table *sgt; - - if (!obj || !obj->dev || !bytes) - return -EINVAL; - - bytes = PAGE_ALIGN(bytes); - - va = vmalloc(bytes); - if (!va) - return -ENOMEM; - - flags |= IOVMF_DISCONT; - flags |= IOVMF_ALLOC; - - sgt = sgtable_alloc(bytes, flags, da, 0); - if (IS_ERR(sgt)) { - da = PTR_ERR(sgt); - goto err_sgt_alloc; - } - sgtable_fill_vmalloc(sgt, va); - - da = __iommu_vmap(domain, obj, da, sgt, va, bytes, flags); - if (IS_ERR_VALUE(da)) - goto err_iommu_vmap; - - return da; - -err_iommu_vmap: - sgtable_drain_vmalloc(sgt); - sgtable_free(sgt); -err_sgt_alloc: - vfree(va); - return da; -} -EXPORT_SYMBOL_GPL(omap_iommu_vmalloc); - -/** - * omap_iommu_vfree - release memory allocated by 'omap_iommu_vmalloc()' - * @dev: client device - * @da: iommu device virtual address - * - * Frees the iommu virtually continuous memory area starting at - * @da, as obtained from 'omap_iommu_vmalloc()'. - */ -void omap_iommu_vfree(struct iommu_domain *domain, struct device *dev, - const u32 da) -{ - struct omap_iommu *obj = dev_to_omap_iommu(dev); - struct sg_table *sgt; - - sgt = unmap_vm_area(domain, obj, da, vfree, - IOVMF_DISCONT | IOVMF_ALLOC); - if (!sgt) - dev_dbg(obj->dev, "%s: No sgt\n", __func__); - sgtable_free(sgt); -} -EXPORT_SYMBOL_GPL(omap_iommu_vfree); - -static int __init iovmm_init(void) -{ - const unsigned long flags = SLAB_HWCACHE_ALIGN; - struct kmem_cache *p; - - p = kmem_cache_create("iovm_area_cache", sizeof(struct iovm_struct), 0, - flags, NULL); - if (!p) - return -ENOMEM; - iovm_area_cachep = p; - - return 0; -} -module_init(iovmm_init); - -static void __exit iovmm_exit(void) -{ - kmem_cache_destroy(iovm_area_cachep); -} -module_exit(iovmm_exit); - -MODULE_DESCRIPTION("omap iommu: simple virtual address space management"); -MODULE_AUTHOR("Hiroshi DOYU <Hiroshi.DOYU@nokia.com>"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/pci.h b/drivers/iommu/pci.h deleted file mode 100644 index 352d80ae7443..000000000000 --- a/drivers/iommu/pci.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright (C) 2013 Red Hat, Inc. - * Copyright (C) 2013 Freescale Semiconductor, Inc. - * - */ -#ifndef __IOMMU_PCI_H -#define __IOMMU_PCI_H - -/* Helper function for swapping pci device reference */ -static inline void swap_pci_ref(struct pci_dev **from, struct pci_dev *to) -{ - pci_dev_put(*from); - *from = to; -} - -#endif /* __IOMMU_PCI_H */ diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c index 464acda0bbc4..1333e6fb3405 100644 --- a/drivers/iommu/shmobile-iommu.c +++ b/drivers/iommu/shmobile-iommu.c @@ -354,7 +354,7 @@ static int shmobile_iommu_add_device(struct device *dev) return 0; } -static struct iommu_ops shmobile_iommu_ops = { +static const struct iommu_ops shmobile_iommu_ops = { .domain_init = shmobile_iommu_domain_init, .domain_destroy = shmobile_iommu_domain_destroy, .attach_dev = shmobile_iommu_attach_device, diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index dba1a9fd5070..b10a8ecede8e 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -309,7 +309,7 @@ static int gart_iommu_domain_has_cap(struct iommu_domain *domain, return 0; } -static struct iommu_ops gart_iommu_ops = { +static const struct iommu_ops gart_iommu_ops = { .domain_init = gart_iommu_domain_init, .domain_destroy = gart_iommu_domain_destroy, .attach_dev = gart_iommu_attach_dev, diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 605b5b46a903..792da5ea6d12 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -947,7 +947,7 @@ static void smmu_iommu_domain_destroy(struct iommu_domain *domain) dev_dbg(smmu->dev, "smmu_as@%p\n", as); } -static struct iommu_ops smmu_iommu_ops = { +static const struct iommu_ops smmu_iommu_ops = { .domain_init = smmu_iommu_domain_init, .domain_destroy = smmu_iommu_domain_destroy, .attach_dev = smmu_iommu_attach_dev, |