diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-10 01:15:47 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-10 01:15:47 +0300 |
commit | 28b47809b2171a6cfbab839936b24280639c9f85 (patch) | |
tree | 23c918f66783e269e95680136f80362d9c62070d | |
parent | 4a1e31c68e9f40be32838944931178b0d9ed9162 (diff) | |
parent | 2c0248d68880fc0e783af1048b3367ee5d4412f0 (diff) | |
download | linux-28b47809b2171a6cfbab839936b24280639c9f85.tar.xz |
Merge tag 'iommu-updates-v4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull IOMMU updates from Joerg Roedel:
- code optimizations for the Intel VT-d driver
- ability to switch off a previously enabled Intel IOMMU
- support for 'struct iommu_device' for OMAP, Rockchip and Mediatek
IOMMUs
- header optimizations for IOMMU core code headers and a few fixes that
became necessary in other parts of the kernel because of that
- ACPI/IORT updates and fixes
- Exynos IOMMU optimizations
- updates for the IOMMU dma-api code to bring it closer to use per-cpu
iova caches
- new command-line option to set default domain type allocated by the
iommu core code
- another command line option to allow the Intel IOMMU switched off in
a tboot environment
- ARM/SMMU: TLB sync optimisations for SMMUv2, Support for using an
IDENTITY domain in conjunction with DMA ops, Support for SMR masking,
Support for 16-bit ASIDs (was previously broken)
- various other small fixes and improvements
* tag 'iommu-updates-v4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (63 commits)
soc/qbman: Move dma-mapping.h include to qman_priv.h
soc/qbman: Fix implicit header dependency now causing build fails
iommu: Remove trace-events include from iommu.h
iommu: Remove pci.h include from trace/events/iommu.h
arm: dma-mapping: Don't override dma_ops in arch_setup_dma_ops()
ACPI/IORT: Fix CONFIG_IOMMU_API dependency
iommu/vt-d: Don't print the failure message when booting non-kdump kernel
iommu: Move report_iommu_fault() to iommu.c
iommu: Include device.h in iommu.h
x86, iommu/vt-d: Add an option to disable Intel IOMMU force on
iommu/arm-smmu: Return IOVA in iova_to_phys when SMMU is bypassed
iommu/arm-smmu: Correct sid to mask
iommu/amd: Fix incorrect error handling in amd_iommu_bind_pasid()
iommu: Make iommu_bus_notifier return NOTIFY_DONE rather than error code
omap3isp: Remove iommu_group related code
iommu/omap: Add iommu-group support
iommu/omap: Make use of 'struct iommu_device'
iommu/omap: Store iommu_dev pointer in arch_data
iommu/omap: Move data structures to omap-iommu.h
iommu/omap: Drop legacy-style device support
...
48 files changed, 1188 insertions, 765 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index e4c9e0e46b95..130e7ecaf9a6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1578,6 +1578,15 @@ extended tables themselves, and also PASID support. With this option set, extended tables will not be used even on hardware which claims to support them. + tboot_noforce [Default Off] + Do not force the Intel IOMMU enabled under tboot. + By default, tboot will force Intel IOMMU on, which + could harm performance of some high-throughput + devices like 40GBit network cards, even if identity + mapping is enabled. + Note that using this option lowers the security + provided by tboot because it makes the system + vulnerable to DMA attacks. intel_idle.max_cstate= [KNL,HW,ACPI,X86] 0 disables intel_idle and fall back on acpi_idle. @@ -1644,6 +1653,12 @@ nobypass [PPC/POWERNV] Disable IOMMU bypass, using IOMMU for PCI devices. + iommu.passthrough= + [ARM64] Configure DMA to bypass the IOMMU by default. + Format: { "0" | "1" } + 0 - Use IOMMU translation for DMA. + 1 - Bypass the IOMMU for DMA. + unset - Use IOMMU translation for DMA. io7= [HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index 6cdf32d037fc..8a6ffce12af5 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -60,6 +60,17 @@ conditions. aliases of secure registers have to be used during SMMU configuration. +- stream-match-mask : For SMMUs supporting stream matching and using + #iommu-cells = <1>, specifies a mask of bits to ignore + when matching stream IDs (e.g. this may be programmed + into the SMRn.MASK field of every stream match register + used). For cases where it is desirable to ignore some + portion of every Stream ID (e.g. for certain MMU-500 + configurations given globally unique input IDs). This + property is not valid for SMMUs using stream indexing, + or using stream matching with #iommu-cells = <2>, and + may be ignored if present in such cases. + ** Deprecated properties: - mmu-masters (deprecated in favour of the generic "iommus" binding) : @@ -109,3 +120,20 @@ conditions. master3 { iommus = <&smmu2 1 0x30>; }; + + + /* ARM MMU-500 with 10-bit stream ID input configuration */ + smmu3: iommu { + compatible = "arm,mmu-500", "arm,smmu-v2"; + ... + #iommu-cells = <1>; + /* always ignore appended 5-bit TBU number */ + stream-match-mask = 0x7c00; + }; + + bus { + /* bus whose child devices emit one unique 10-bit stream + ID each, but may master through multiple SMMU TBUs */ + iommu-map = <0 &smmu3 0 0x400>; + ... + }; diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 0268584f1fa0..c742dfd2967b 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -2408,6 +2408,15 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct dma_map_ops *dma_ops; dev->archdata.dma_coherent = coherent; + + /* + * Don't override the dma_ops if they have already been set. Ideally + * this should be the only location where dma_ops are set, remove this + * check when all other callers of set_dma_ops will have disappeared. + */ + if (dev->dma_ops) + return; + if (arm_setup_iommu_dma_ops(dev, dma_base, size, iommu)) dma_ops = arm_get_iommu_dma_map_ops(coherent); else diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 4dac4afc95a5..3216e098c058 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -28,6 +28,7 @@ #include <linux/dma-contiguous.h> #include <linux/vmalloc.h> #include <linux/swiotlb.h> +#include <linux/pci.h> #include <asm/cacheflush.h> @@ -879,34 +880,26 @@ static const struct dma_map_ops iommu_dma_ops = { .mapping_error = iommu_dma_mapping_error, }; -/* - * TODO: Right now __iommu_setup_dma_ops() gets called too early to do - * everything it needs to - the device is only partially created and the - * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we - * need this delayed attachment dance. Once IOMMU probe ordering is sorted - * to move the arch_setup_dma_ops() call later, all the notifier bits below - * become unnecessary, and will go away. - */ -struct iommu_dma_notifier_data { - struct list_head list; - struct device *dev; - const struct iommu_ops *ops; - u64 dma_base; - u64 size; -}; -static LIST_HEAD(iommu_dma_masters); -static DEFINE_MUTEX(iommu_dma_notifier_lock); +static int __init __iommu_dma_init(void) +{ + return iommu_dma_init(); +} +arch_initcall(__iommu_dma_init); -static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, - u64 dma_base, u64 size) +static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *ops) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iommu_domain *domain; + + if (!ops) + return; /* - * If the IOMMU driver has the DMA domain support that we require, - * then the IOMMU core will have already configured a group for this - * device, and allocated the default domain for that group. + * The IOMMU core code allocates the default DMA domain, which the + * underlying IOMMU driver needs to support via the dma-iommu layer. */ + domain = iommu_get_domain_for_dev(dev); + if (!domain) goto out_err; @@ -917,109 +910,11 @@ static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, dev->dma_ops = &iommu_dma_ops; } - return true; + return; + out_err: - pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", + pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", dev_name(dev)); - return false; -} - -static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops, - u64 dma_base, u64 size) -{ - struct iommu_dma_notifier_data *iommudata; - - iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL); - if (!iommudata) - return; - - iommudata->dev = dev; - iommudata->ops = ops; - iommudata->dma_base = dma_base; - iommudata->size = size; - - mutex_lock(&iommu_dma_notifier_lock); - list_add(&iommudata->list, &iommu_dma_masters); - mutex_unlock(&iommu_dma_notifier_lock); -} - -static int __iommu_attach_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct iommu_dma_notifier_data *master, *tmp; - - if (action != BUS_NOTIFY_BIND_DRIVER) - return 0; - - mutex_lock(&iommu_dma_notifier_lock); - list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { - if (data == master->dev && do_iommu_attach(master->dev, - master->ops, master->dma_base, master->size)) { - list_del(&master->list); - kfree(master); - break; - } - } - mutex_unlock(&iommu_dma_notifier_lock); - return 0; -} - -static int __init register_iommu_dma_ops_notifier(struct bus_type *bus) -{ - struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); - int ret; - - if (!nb) - return -ENOMEM; - - nb->notifier_call = __iommu_attach_notifier; - - ret = bus_register_notifier(bus, nb); - if (ret) { - pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n", - bus->name); - kfree(nb); - } - return ret; -} - -static int __init __iommu_dma_init(void) -{ - int ret; - - ret = iommu_dma_init(); - if (!ret) - ret = register_iommu_dma_ops_notifier(&platform_bus_type); - if (!ret) - ret = register_iommu_dma_ops_notifier(&amba_bustype); -#ifdef CONFIG_PCI - if (!ret) - ret = register_iommu_dma_ops_notifier(&pci_bus_type); -#endif - return ret; -} -arch_initcall(__iommu_dma_init); - -static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *ops) -{ - struct iommu_group *group; - - if (!ops) - return; - /* - * TODO: As a concession to the future, we're ready to handle being - * called both early and late (i.e. after bus_add_device). Once all - * the platform bus code is reworked to call us late and the notifier - * junk above goes away, move the body of do_iommu_attach here. - */ - group = iommu_group_get(dev); - if (group) { - do_iommu_attach(dev, ops, dma_base, size); - iommu_group_put(group); - } else { - queue_iommu_attach(dev, ops, dma_base, size); - } } void arch_teardown_dma_ops(struct device *dev) diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index d4c8011a2293..4b1724059909 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -514,6 +514,9 @@ int tboot_force_iommu(void) if (!tboot_enabled()) return 0; + if (!intel_iommu_tboot_noforce) + return 1; + if (no_iommu || swiotlb || dmar_disabled) pr_warning("Forcing Intel-IOMMU to enabled\n"); diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 22e08d272db7..c5fecf97ee2f 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -618,6 +618,46 @@ static int arm_smmu_iort_xlate(struct device *dev, u32 streamid, return ret; } +static inline bool iort_iommu_driver_enabled(u8 type) +{ + switch (type) { + case ACPI_IORT_NODE_SMMU_V3: + return IS_BUILTIN(CONFIG_ARM_SMMU_V3); + case ACPI_IORT_NODE_SMMU: + return IS_BUILTIN(CONFIG_ARM_SMMU); + default: + pr_warn("IORT node type %u does not describe an SMMU\n", type); + return false; + } +} + +#ifdef CONFIG_IOMMU_API +static inline +const struct iommu_ops *iort_fwspec_iommu_ops(struct iommu_fwspec *fwspec) +{ + return (fwspec && fwspec->ops) ? fwspec->ops : NULL; +} + +static inline +int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev) +{ + int err = 0; + + if (!IS_ERR_OR_NULL(ops) && ops->add_device && dev->bus && + !dev->iommu_group) + err = ops->add_device(dev); + + return err; +} +#else +static inline +const struct iommu_ops *iort_fwspec_iommu_ops(struct iommu_fwspec *fwspec) +{ return NULL; } +static inline +int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev) +{ return 0; } +#endif + static const struct iommu_ops *iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node, u32 streamid) @@ -626,14 +666,31 @@ static const struct iommu_ops *iort_iommu_xlate(struct device *dev, int ret = -ENODEV; struct fwnode_handle *iort_fwnode; + /* + * If we already translated the fwspec there + * is nothing left to do, return the iommu_ops. + */ + ops = iort_fwspec_iommu_ops(dev->iommu_fwspec); + if (ops) + return ops; + if (node) { iort_fwnode = iort_get_fwnode(node); if (!iort_fwnode) return NULL; ops = iommu_ops_from_fwnode(iort_fwnode); + /* + * If the ops look-up fails, this means that either + * the SMMU drivers have not been probed yet or that + * the SMMU drivers are not built in the kernel; + * Depending on whether the SMMU drivers are built-in + * in the kernel or not, defer the IOMMU configuration + * or just abort it. + */ if (!ops) - return NULL; + return iort_iommu_driver_enabled(node->type) ? + ERR_PTR(-EPROBE_DEFER) : NULL; ret = arm_smmu_iort_xlate(dev, streamid, iort_fwnode, ops); } @@ -676,6 +733,7 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) struct acpi_iort_node *node, *parent; const struct iommu_ops *ops = NULL; u32 streamid = 0; + int err; if (dev_is_pci(dev)) { struct pci_bus *bus = to_pci_dev(dev)->bus; @@ -707,6 +765,8 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) while (parent) { ops = iort_iommu_xlate(dev, parent, streamid); + if (IS_ERR_OR_NULL(ops)) + return ops; parent = iort_node_map_platform_id(node, &streamid, IORT_IOMMU_TYPE, @@ -714,6 +774,14 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) } } + /* + * If we have reason to believe the IOMMU driver missed the initial + * add_device callback for dev, replay it to get things in order. + */ + err = iort_add_device_replay(ops, dev); + if (err) + ops = ERR_PTR(err); + return ops; } @@ -1052,6 +1120,4 @@ void __init acpi_iort_init(void) } iort_init_platform_devices(); - - acpi_probe_device_table(iort); } diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 3e7020751d34..3be1433853bf 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -179,7 +179,6 @@ int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev) struct list_head *physnode_list; unsigned int node_id; int retval = -EINVAL; - enum dev_dma_attr attr; if (has_acpi_companion(dev)) { if (acpi_dev) { @@ -236,10 +235,6 @@ int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev) if (!has_acpi_companion(dev)) ACPI_COMPANION_SET(dev, acpi_dev); - attr = acpi_get_dma_attr(acpi_dev); - if (attr != DEV_DMA_NOT_SUPPORTED) - acpi_dma_configure(dev, attr); - acpi_physnode_link_name(physical_node_name, node_id); retval = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj, physical_node_name); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index c26931067415..e39ec7b7cb67 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1363,20 +1363,25 @@ enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) * @dev: The pointer to the device * @attr: device dma attributes */ -void acpi_dma_configure(struct device *dev, enum dev_dma_attr attr) +int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr) { const struct iommu_ops *iommu; + u64 size; iort_set_dma_mask(dev); iommu = iort_iommu_configure(dev); + if (IS_ERR(iommu)) + return PTR_ERR(iommu); + size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1); /* * Assume dma valid range starts at 0 and covers the whole * coherent_dma_mask. */ - arch_setup_dma_ops(dev, 0, dev->coherent_dma_mask + 1, iommu, - attr == DEV_DMA_COHERENT); + arch_setup_dma_ops(dev, 0, size, iommu, attr == DEV_DMA_COHERENT); + + return 0; } EXPORT_SYMBOL_GPL(acpi_dma_configure); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index a1fbf55c4d3a..4882f06d12df 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -19,6 +19,7 @@ #include <linux/device.h> #include <linux/delay.h> +#include <linux/dma-mapping.h> #include <linux/module.h> #include <linux/kthread.h> #include <linux/wait.h> @@ -356,6 +357,10 @@ re_probe: if (ret) goto pinctrl_bind_failed; + ret = dma_configure(dev); + if (ret) + goto dma_failed; + if (driver_sysfs_add(dev)) { printk(KERN_ERR "%s: driver_sysfs_add(%s) failed\n", __func__, dev_name(dev)); @@ -417,6 +422,8 @@ re_probe: goto done; probe_failed: + dma_deconfigure(dev); +dma_failed: if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DRIVER_NOT_BOUND, dev); @@ -826,6 +833,8 @@ static void __device_release_driver(struct device *dev, struct device *parent) drv->remove(dev); device_links_driver_cleanup(dev); + dma_deconfigure(dev); + devres_release_all(dev); dev->driver = NULL; dev_set_drvdata(dev, NULL); diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c index 51b7061ff7c0..f3deb6af42ad 100644 --- a/drivers/base/dma-mapping.c +++ b/drivers/base/dma-mapping.c @@ -7,9 +7,11 @@ * This file is released under the GPLv2. */ +#include <linux/acpi.h> #include <linux/dma-mapping.h> #include <linux/export.h> #include <linux/gfp.h> +#include <linux/of_device.h> #include <linux/slab.h> #include <linux/vmalloc.h> @@ -340,3 +342,42 @@ void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) vunmap(cpu_addr); } #endif + +/* + * Common configuration to enable DMA API use for a device + */ +#include <linux/pci.h> + +int dma_configure(struct device *dev) +{ + struct device *bridge = NULL, *dma_dev = dev; + enum dev_dma_attr attr; + int ret = 0; + + if (dev_is_pci(dev)) { + bridge = pci_get_host_bridge_device(to_pci_dev(dev)); + dma_dev = bridge; + if (IS_ENABLED(CONFIG_OF) && dma_dev->parent && + dma_dev->parent->of_node) + dma_dev = dma_dev->parent; + } + + if (dma_dev->of_node) { + ret = of_dma_configure(dev, dma_dev->of_node); + } else if (has_acpi_companion(dma_dev)) { + attr = acpi_get_dma_attr(to_acpi_device_node(dma_dev->fwnode)); + if (attr != DEV_DMA_NOT_SUPPORTED) + ret = acpi_dma_configure(dev, attr); + } + + if (bridge) + pci_put_host_bridge_device(bridge); + + return ret; +} + +void dma_deconfigure(struct device *dev) +{ + of_dma_deconfigure(dev); + acpi_dma_deconfigure(dev); +} diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index ef11e770f822..6a72095d6c7a 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -35,6 +35,7 @@ #include <rdma/ib_user_verbs.h> #include <linux/netdevice.h> #include <linux/iommu.h> +#include <linux/pci.h> #include <net/addrconf.h> #include <linux/qed/qede_roce.h> #include <linux/qed/qed_chain.h> diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 063343909b0d..6629c472eafd 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -696,9 +696,9 @@ out_clear_state: out_unregister: mmu_notifier_unregister(&pasid_state->mn, mm); + mmput(mm); out_free: - mmput(mm); free_pasid_state(pasid_state); out: diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 591bb96047c9..380969aa60d5 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -554,9 +554,14 @@ struct arm_smmu_s2_cfg { }; struct arm_smmu_strtab_ent { - bool valid; - - bool bypass; /* Overrides s1/s2 config */ + /* + * An STE is "assigned" if the master emitting the corresponding SID + * is attached to a domain. The behaviour of an unassigned STE is + * determined by the disable_bypass parameter, whereas an assigned + * STE behaves according to s1_cfg/s2_cfg, which themselves are + * configured according to the domain type. + */ + bool assigned; struct arm_smmu_s1_cfg *s1_cfg; struct arm_smmu_s2_cfg *s2_cfg; }; @@ -632,6 +637,7 @@ enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, ARM_SMMU_DOMAIN_S2, ARM_SMMU_DOMAIN_NESTED, + ARM_SMMU_DOMAIN_BYPASS, }; struct arm_smmu_domain { @@ -1005,9 +1011,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, * This is hideously complicated, but we only really care about * three cases at the moment: * - * 1. Invalid (all zero) -> bypass (init) - * 2. Bypass -> translation (attach) - * 3. Translation -> bypass (detach) + * 1. Invalid (all zero) -> bypass/fault (init) + * 2. Bypass/fault -> translation/bypass (attach) + * 3. Translation/bypass -> bypass/fault (detach) * * Given that we can't update the STE atomically and the SMMU * doesn't read the thing in a defined order, that leaves us @@ -1046,11 +1052,15 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, } /* Nuke the existing STE_0 value, as we're going to rewrite it */ - val = ste->valid ? STRTAB_STE_0_V : 0; + val = STRTAB_STE_0_V; + + /* Bypass/fault */ + if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) { + if (!ste->assigned && disable_bypass) + val |= STRTAB_STE_0_CFG_ABORT; + else + val |= STRTAB_STE_0_CFG_BYPASS; - if (ste->bypass) { - val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT - : STRTAB_STE_0_CFG_BYPASS; dst[0] = cpu_to_le64(val); dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING << STRTAB_STE_1_SHCFG_SHIFT); @@ -1111,10 +1121,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent) { unsigned int i; - struct arm_smmu_strtab_ent ste = { - .valid = true, - .bypass = true, - }; + struct arm_smmu_strtab_ent ste = { .assigned = false }; for (i = 0; i < nent; ++i) { arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste); @@ -1378,7 +1385,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) { struct arm_smmu_domain *smmu_domain; - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) + if (type != IOMMU_DOMAIN_UNMANAGED && + type != IOMMU_DOMAIN_DMA && + type != IOMMU_DOMAIN_IDENTITY) return NULL; /* @@ -1509,6 +1518,11 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; + if (domain->type == IOMMU_DOMAIN_IDENTITY) { + smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS; + return 0; + } + /* Restrict the stage to what we can actually support */ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) smmu_domain->stage = ARM_SMMU_DOMAIN_S2; @@ -1579,7 +1593,7 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) return step; } -static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) +static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) { int i; struct arm_smmu_master_data *master = fwspec->iommu_priv; @@ -1591,17 +1605,14 @@ static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste); } - - return 0; } static void arm_smmu_detach_dev(struct device *dev) { struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv; - master->ste.bypass = true; - if (arm_smmu_install_ste_for_dev(dev->iommu_fwspec) < 0) - dev_warn(dev, "failed to install bypass STE\n"); + master->ste.assigned = false; + arm_smmu_install_ste_for_dev(dev->iommu_fwspec); } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -1620,7 +1631,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) ste = &master->ste; /* Already attached to a different domain? */ - if (!ste->bypass) + if (ste->assigned) arm_smmu_detach_dev(dev); mutex_lock(&smmu_domain->init_mutex); @@ -1641,10 +1652,12 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) goto out_unlock; } - ste->bypass = false; - ste->valid = true; + ste->assigned = true; - if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { + if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) { + ste->s1_cfg = NULL; + ste->s2_cfg = NULL; + } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { ste->s1_cfg = &smmu_domain->s1_cfg; ste->s2_cfg = NULL; arm_smmu_write_ctx_desc(smmu, ste->s1_cfg); @@ -1653,10 +1666,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) ste->s2_cfg = &smmu_domain->s2_cfg; } - ret = arm_smmu_install_ste_for_dev(dev->iommu_fwspec); - if (ret < 0) - ste->valid = false; - + arm_smmu_install_ste_for_dev(dev->iommu_fwspec); out_unlock: mutex_unlock(&smmu_domain->init_mutex); return ret; @@ -1704,6 +1714,9 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + if (domain->type == IOMMU_DOMAIN_IDENTITY) + return iova; + if (!ops) return 0; @@ -1807,7 +1820,7 @@ static void arm_smmu_remove_device(struct device *dev) master = fwspec->iommu_priv; smmu = master->smmu; - if (master && master->ste.valid) + if (master && master->ste.assigned) arm_smmu_detach_dev(dev); iommu_group_remove_device(dev); iommu_device_unlink(&smmu->iommu, dev); @@ -1837,6 +1850,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + if (domain->type != IOMMU_DOMAIN_UNMANAGED) + return -EINVAL; + switch (attr) { case DOMAIN_ATTR_NESTING: *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); @@ -1852,6 +1868,9 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, int ret = 0; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + if (domain->type != IOMMU_DOMAIN_UNMANAGED) + return -EINVAL; + mutex_lock(&smmu_domain->init_mutex); switch (attr) { @@ -1893,6 +1912,8 @@ static void arm_smmu_get_resv_regions(struct device *dev, return; list_add_tail(®ion->list, head); + + iommu_dma_get_resv_regions(dev, head); } static void arm_smmu_put_resv_regions(struct device *dev, @@ -2761,51 +2782,9 @@ static struct platform_driver arm_smmu_driver = { .probe = arm_smmu_device_probe, .remove = arm_smmu_device_remove, }; +module_platform_driver(arm_smmu_driver); -static int __init arm_smmu_init(void) -{ - static bool registered; - int ret = 0; - - if (!registered) { - ret = platform_driver_register(&arm_smmu_driver); - registered = !ret; - } - return ret; -} - -static void __exit arm_smmu_exit(void) -{ - return platform_driver_unregister(&arm_smmu_driver); -} - -subsys_initcall(arm_smmu_init); -module_exit(arm_smmu_exit); - -static int __init arm_smmu_of_init(struct device_node *np) -{ - int ret = arm_smmu_init(); - - if (ret) - return ret; - - if (!of_platform_device_create(np, NULL, platform_bus_type.dev_root)) - return -ENODEV; - - return 0; -} -IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3", arm_smmu_of_init); - -#ifdef CONFIG_ACPI -static int __init acpi_smmu_v3_init(struct acpi_table_header *table) -{ - if (iort_node_match(ACPI_IORT_NODE_SMMU_V3)) - return arm_smmu_init(); - - return 0; -} -IORT_ACPI_DECLARE(arm_smmu_v3, ACPI_SIG_IORT, acpi_smmu_v3_init); -#endif +IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3", NULL); MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations"); MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>"); diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index b493c99e17f7..7ec30b08b3bd 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -162,6 +162,7 @@ #define ARM_SMMU_GR0_sTLBGSTATUS 0x74 #define sTLBGSTATUS_GSACTIVE (1 << 0) #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */ +#define TLB_SPIN_COUNT 10 /* Stream mapping registers */ #define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2)) @@ -216,8 +217,7 @@ enum arm_smmu_s2cr_privcfg { #define CBA2R_VMID_MASK 0xffff /* Translation context bank */ -#define ARM_SMMU_CB_BASE(smmu) ((smmu)->base + ((smmu)->size >> 1)) -#define ARM_SMMU_CB(smmu, n) ((n) * (1 << (smmu)->pgshift)) +#define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift)) #define ARM_SMMU_CB_SCTLR 0x0 #define ARM_SMMU_CB_ACTLR 0x4 @@ -238,6 +238,8 @@ enum arm_smmu_s2cr_privcfg { #define ARM_SMMU_CB_S1_TLBIVAL 0x620 #define ARM_SMMU_CB_S2_TLBIIPAS2 0x630 #define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638 +#define ARM_SMMU_CB_TLBSYNC 0x7f0 +#define ARM_SMMU_CB_TLBSTATUS 0x7f4 #define ARM_SMMU_CB_ATS1PR 0x800 #define ARM_SMMU_CB_ATSR 0x8f0 @@ -344,7 +346,7 @@ struct arm_smmu_device { struct device *dev; void __iomem *base; - unsigned long size; + void __iomem *cb_base; unsigned long pgshift; #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0) @@ -404,18 +406,20 @@ enum arm_smmu_context_fmt { struct arm_smmu_cfg { u8 cbndx; u8 irptndx; + union { + u16 asid; + u16 vmid; + }; u32 cbar; enum arm_smmu_context_fmt fmt; }; #define INVALID_IRPTNDX 0xff -#define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx) -#define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx + 1) - enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, ARM_SMMU_DOMAIN_S2, ARM_SMMU_DOMAIN_NESTED, + ARM_SMMU_DOMAIN_BYPASS, }; struct arm_smmu_domain { @@ -569,49 +573,67 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int idx) } /* Wait for any pending TLB invalidations to complete */ -static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) +static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, + void __iomem *sync, void __iomem *status) { - int count = 0; - void __iomem *gr0_base = ARM_SMMU_GR0(smmu); - - writel_relaxed(0, gr0_base + ARM_SMMU_GR0_sTLBGSYNC); - while (readl_relaxed(gr0_base + ARM_SMMU_GR0_sTLBGSTATUS) - & sTLBGSTATUS_GSACTIVE) { - cpu_relax(); - if (++count == TLB_LOOP_TIMEOUT) { - dev_err_ratelimited(smmu->dev, - "TLB sync timed out -- SMMU may be deadlocked\n"); - return; + unsigned int spin_cnt, delay; + + writel_relaxed(0, sync); + for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) { + for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { + if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE)) + return; + cpu_relax(); } - udelay(1); + udelay(delay); } + dev_err_ratelimited(smmu->dev, + "TLB sync timed out -- SMMU may be deadlocked\n"); } -static void arm_smmu_tlb_sync(void *cookie) +static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu) +{ + void __iomem *base = ARM_SMMU_GR0(smmu); + + __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC, + base + ARM_SMMU_GR0_sTLBGSTATUS); +} + +static void arm_smmu_tlb_sync_context(void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; - __arm_smmu_tlb_sync(smmu_domain->smmu); + struct arm_smmu_device *smmu = smmu_domain->smmu; + void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx); + + __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC, + base + ARM_SMMU_CB_TLBSTATUS); } -static void arm_smmu_tlb_inv_context(void *cookie) +static void arm_smmu_tlb_sync_vmid(void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + + arm_smmu_tlb_sync_global(smmu_domain->smmu); +} + +static void arm_smmu_tlb_inv_context_s1(void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - struct arm_smmu_device *smmu = smmu_domain->smmu; - bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; - void __iomem *base; + void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); - if (stage1) { - base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); - writel_relaxed(ARM_SMMU_CB_ASID(smmu, cfg), - base + ARM_SMMU_CB_S1_TLBIASID); - } else { - base = ARM_SMMU_GR0(smmu); - writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), - base + ARM_SMMU_GR0_TLBIVMID); - } + writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID); + arm_smmu_tlb_sync_context(cookie); +} + +static void arm_smmu_tlb_inv_context_s2(void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + struct arm_smmu_device *smmu = smmu_domain->smmu; + void __iomem *base = ARM_SMMU_GR0(smmu); - __arm_smmu_tlb_sync(smmu); + writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); + arm_smmu_tlb_sync_global(smmu); } static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, @@ -619,31 +641,28 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - struct arm_smmu_device *smmu = smmu_domain->smmu; bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; - void __iomem *reg; + void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); if (stage1) { - reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) { iova &= ~12UL; - iova |= ARM_SMMU_CB_ASID(smmu, cfg); + iova |= cfg->asid; do { writel_relaxed(iova, reg); iova += granule; } while (size -= granule); } else { iova >>= 12; - iova |= (u64)ARM_SMMU_CB_ASID(smmu, cfg) << 48; + iova |= (u64)cfg->asid << 48; do { writeq_relaxed(iova, reg); iova += granule >> 12; } while (size -= granule); } - } else if (smmu->version == ARM_SMMU_V2) { - reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + } else { reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2; iova >>= 12; @@ -651,16 +670,40 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, smmu_write_atomic_lq(iova, reg); iova += granule >> 12; } while (size -= granule); - } else { - reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID; - writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), reg); } } -static const struct iommu_gather_ops arm_smmu_gather_ops = { - .tlb_flush_all = arm_smmu_tlb_inv_context, +/* + * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears + * almost negligible, but the benefit of getting the first one in as far ahead + * of the sync as possible is significant, hence we don't just make this a + * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think. + */ +static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, + size_t granule, bool leaf, void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu); + + writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); +} + +static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s1, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, - .tlb_sync = arm_smmu_tlb_sync, + .tlb_sync = arm_smmu_tlb_sync_context, +}; + +static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s2, + .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, + .tlb_sync = arm_smmu_tlb_sync_context, +}; + +static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s2, + .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync, + .tlb_sync = arm_smmu_tlb_sync_vmid, }; static irqreturn_t arm_smmu_context_fault(int irq, void *dev) @@ -673,7 +716,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) struct arm_smmu_device *smmu = smmu_domain->smmu; void __iomem *cb_base; - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR); if (!(fsr & FSR_FAULT)) @@ -726,7 +769,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, gr1_base = ARM_SMMU_GR1(smmu); stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); if (smmu->version > ARM_SMMU_V1) { if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) @@ -735,7 +778,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, reg = CBA2R_RW64_32BIT; /* 16-bit VMIDs live in CBA2R */ if (smmu->features & ARM_SMMU_FEAT_VMID16) - reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBA2R_VMID_SHIFT; + reg |= cfg->vmid << CBA2R_VMID_SHIFT; writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx)); } @@ -754,34 +797,15 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT); } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) { /* 8-bit VMIDs live in CBAR */ - reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBAR_VMID_SHIFT; + reg |= cfg->vmid << CBAR_VMID_SHIFT; } writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx)); - /* TTBRs */ - if (stage1) { - u16 asid = ARM_SMMU_CB_ASID(smmu, cfg); - - if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { - reg = pgtbl_cfg->arm_v7s_cfg.ttbr[0]; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0); - reg = pgtbl_cfg->arm_v7s_cfg.ttbr[1]; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1); - writel_relaxed(asid, cb_base + ARM_SMMU_CB_CONTEXTIDR); - } else { - reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; - reg64 |= (u64)asid << TTBRn_ASID_SHIFT; - writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0); - reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; - reg64 |= (u64)asid << TTBRn_ASID_SHIFT; - writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1); - } - } else { - reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; - writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0); - } - - /* TTBCR */ + /* + * TTBCR + * We must write this before the TTBRs, since it determines the + * access behaviour of some fields (in particular, ASID[15:8]). + */ if (stage1) { if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { reg = pgtbl_cfg->arm_v7s_cfg.tcr; @@ -800,6 +824,27 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, } writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR); + /* TTBRs */ + if (stage1) { + if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { + reg = pgtbl_cfg->arm_v7s_cfg.ttbr[0]; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0); + reg = pgtbl_cfg->arm_v7s_cfg.ttbr[1]; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1); + writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR); + } else { + reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; + reg64 |= (u64)cfg->asid << TTBRn_ASID_SHIFT; + writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0); + reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; + reg64 |= (u64)cfg->asid << TTBRn_ASID_SHIFT; + writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1); + } + } else { + reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; + writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0); + } + /* MAIRs (stage-1 only) */ if (stage1) { if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { @@ -833,11 +878,18 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, enum io_pgtable_fmt fmt; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + const struct iommu_gather_ops *tlb_ops; mutex_lock(&smmu_domain->init_mutex); if (smmu_domain->smmu) goto out_unlock; + if (domain->type == IOMMU_DOMAIN_IDENTITY) { + smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS; + smmu_domain->smmu = smmu; + goto out_unlock; + } + /* * Mapping the requested stage onto what we support is surprisingly * complicated, mainly because the spec allows S1+S2 SMMUs without @@ -904,6 +956,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ias = min(ias, 32UL); oas = min(oas, 32UL); } + tlb_ops = &arm_smmu_s1_tlb_ops; break; case ARM_SMMU_DOMAIN_NESTED: /* @@ -922,12 +975,15 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ias = min(ias, 40UL); oas = min(oas, 40UL); } + if (smmu->version == ARM_SMMU_V2) + tlb_ops = &arm_smmu_s2_tlb_ops_v2; + else + tlb_ops = &arm_smmu_s2_tlb_ops_v1; break; default: ret = -EINVAL; goto out_unlock; } - ret = __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks); if (ret < 0) @@ -941,11 +997,16 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, cfg->irptndx = cfg->cbndx; } + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2) + cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base; + else + cfg->asid = cfg->cbndx + smmu->cavium_id_base; + pgtbl_cfg = (struct io_pgtable_cfg) { .pgsize_bitmap = smmu->pgsize_bitmap, .ias = ias, .oas = oas, - .tlb = &arm_smmu_gather_ops, + .tlb = tlb_ops, .iommu_dev = smmu->dev, }; @@ -998,14 +1059,14 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) void __iomem *cb_base; int irq; - if (!smmu) + if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY) return; /* * Disable the context bank and free the page tables before freeing * it. */ - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); if (cfg->irptndx != INVALID_IRPTNDX) { @@ -1021,7 +1082,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) { struct arm_smmu_domain *smmu_domain; - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) + if (type != IOMMU_DOMAIN_UNMANAGED && + type != IOMMU_DOMAIN_DMA && + type != IOMMU_DOMAIN_IDENTITY) return NULL; /* * Allocate the domain and initialise some of its data structures. @@ -1250,10 +1313,15 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain, { struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s2cr *s2cr = smmu->s2crs; - enum arm_smmu_s2cr_type type = S2CR_TYPE_TRANS; u8 cbndx = smmu_domain->cfg.cbndx; + enum arm_smmu_s2cr_type type; int i, idx; + if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) + type = S2CR_TYPE_BYPASS; + else + type = S2CR_TYPE_TRANS; + for_each_cfg_sme(fwspec, i, idx) { if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx) continue; @@ -1356,7 +1424,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, u64 phys; unsigned long va; - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); /* ATS1 registers can only be written atomically */ va = iova & ~0xfffUL; @@ -1391,6 +1459,9 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; + if (domain->type == IOMMU_DOMAIN_IDENTITY) + return iova; + if (!ops) return 0; @@ -1467,7 +1538,7 @@ static int arm_smmu_add_device(struct device *dev) } if (mask & ~smmu->smr_mask_mask) { dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n", - sid, smmu->smr_mask_mask); + mask, smmu->smr_mask_mask); goto out_free; } } @@ -1549,6 +1620,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + if (domain->type != IOMMU_DOMAIN_UNMANAGED) + return -EINVAL; + switch (attr) { case DOMAIN_ATTR_NESTING: *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); @@ -1564,6 +1638,9 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, int ret = 0; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + if (domain->type != IOMMU_DOMAIN_UNMANAGED) + return -EINVAL; + mutex_lock(&smmu_domain->init_mutex); switch (attr) { @@ -1590,13 +1667,15 @@ out_unlock: static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) { - u32 fwid = 0; + u32 mask, fwid = 0; if (args->args_count > 0) fwid |= (u16)args->args[0]; if (args->args_count > 1) fwid |= (u16)args->args[1] << SMR_MASK_SHIFT; + else if (!of_property_read_u32(args->np, "stream-match-mask", &mask)) + fwid |= (u16)mask << SMR_MASK_SHIFT; return iommu_fwspec_add_ids(dev, &fwid, 1); } @@ -1613,6 +1692,8 @@ static void arm_smmu_get_resv_regions(struct device *dev, return; list_add_tail(®ion->list, head); + + iommu_dma_get_resv_regions(dev, head); } static void arm_smmu_put_resv_regions(struct device *dev, @@ -1683,7 +1764,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) /* Make sure all context banks are disabled and clear CB_FSR */ for (i = 0; i < smmu->num_context_banks; ++i) { - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i); + cb_base = ARM_SMMU_CB(smmu, i); writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR); /* @@ -1729,7 +1810,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) reg |= sCR0_EXIDENABLE; /* Push the button */ - __arm_smmu_tlb_sync(smmu); + arm_smmu_tlb_sync_global(smmu); writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); } @@ -1863,11 +1944,11 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) /* Check for size mismatch of SMMU address space from mapped region */ size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1); - size *= 2 << smmu->pgshift; - if (smmu->size != size) + size <<= smmu->pgshift; + if (smmu->cb_base != gr0_base + size) dev_warn(smmu->dev, - "SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n", - size, smmu->size); + "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n", + size * 2, (smmu->cb_base - gr0_base) * 2); smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK; smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK; @@ -1887,6 +1968,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) atomic_add_return(smmu->num_context_banks, &cavium_smmu_context_count); smmu->cavium_id_base -= smmu->num_context_banks; + dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n"); } /* ID2 */ @@ -2075,6 +2157,23 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, return 0; } +static void arm_smmu_bus_init(void) +{ + /* Oh, for a proper bus abstraction */ + if (!iommu_present(&platform_bus_type)) + bus_set_iommu(&platform_bus_type, &arm_smmu_ops); +#ifdef CONFIG_ARM_AMBA + if (!iommu_present(&amba_bustype)) + bus_set_iommu(&amba_bustype, &arm_smmu_ops); +#endif +#ifdef CONFIG_PCI + if (!iommu_present(&pci_bus_type)) { + pci_request_acs(); + bus_set_iommu(&pci_bus_type, &arm_smmu_ops); + } +#endif +} + static int arm_smmu_device_probe(struct platform_device *pdev) { struct resource *res; @@ -2103,7 +2202,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) smmu->base = devm_ioremap_resource(dev, res); if (IS_ERR(smmu->base)) return PTR_ERR(smmu->base); - smmu->size = resource_size(res); + smmu->cb_base = smmu->base + resource_size(res) / 2; num_irqs = 0; while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) { @@ -2180,21 +2279,30 @@ static int arm_smmu_device_probe(struct platform_device *pdev) arm_smmu_device_reset(smmu); arm_smmu_test_smr_masks(smmu); - /* Oh, for a proper bus abstraction */ - if (!iommu_present(&platform_bus_type)) - bus_set_iommu(&platform_bus_type, &arm_smmu_ops); -#ifdef CONFIG_ARM_AMBA - if (!iommu_present(&amba_bustype)) - bus_set_iommu(&amba_bustype, &arm_smmu_ops); -#endif -#ifdef CONFIG_PCI - if (!iommu_present(&pci_bus_type)) { - pci_request_acs(); - bus_set_iommu(&pci_bus_type, &arm_smmu_ops); - } -#endif + /* + * For ACPI and generic DT bindings, an SMMU will be probed before + * any device which might need it, so we want the bus ops in place + * ready to handle default domain setup as soon as any SMMU exists. + */ + if (!using_legacy_binding) + arm_smmu_bus_init(); + + return 0; +} + +/* + * With the legacy DT binding in play, though, we have no guarantees about + * probe order, but then we're also not doing default domains, so we can + * delay setting bus ops until we're sure every possible SMMU is ready, + * and that way ensure that no add_device() calls get missed. + */ +static int arm_smmu_legacy_bus_init(void) +{ + if (using_legacy_binding) + arm_smmu_bus_init(); return 0; } +device_initcall_sync(arm_smmu_legacy_bus_init); static int arm_smmu_device_remove(struct platform_device *pdev) { @@ -2219,56 +2327,14 @@ static struct platform_driver arm_smmu_driver = { .probe = arm_smmu_device_probe, .remove = arm_smmu_device_remove, }; - -static int __init arm_smmu_init(void) -{ - static bool registered; - int ret = 0; - - if (!registered) { - ret = platform_driver_register(&arm_smmu_driver); - registered = !ret; - } - return ret; -} - -static void __exit arm_smmu_exit(void) -{ - return platform_driver_unregister(&arm_smmu_driver); -} - -subsys_initcall(arm_smmu_init); -module_exit(arm_smmu_exit); - -static int __init arm_smmu_of_init(struct device_node *np) -{ - int ret = arm_smmu_init(); - - if (ret) - return ret; - - if (!of_platform_device_create(np, NULL, platform_bus_type.dev_root)) - return -ENODEV; - - return 0; -} -IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", arm_smmu_of_init); -IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", arm_smmu_of_init); -IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", arm_smmu_of_init); -IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", arm_smmu_of_init); -IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", arm_smmu_of_init); -IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", arm_smmu_of_init); - -#ifdef CONFIG_ACPI -static int __init arm_smmu_acpi_init(struct acpi_table_header *table) -{ - if (iort_node_match(ACPI_IORT_NODE_SMMU)) - return arm_smmu_init(); - - return 0; -} -IORT_ACPI_DECLARE(arm_smmu, ACPI_SIG_IORT, arm_smmu_acpi_init); -#endif +module_platform_driver(arm_smmu_driver); + +IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", NULL); +IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", NULL); +IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", NULL); +IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", NULL); +IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", NULL); +IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", NULL); MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations"); MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>"); diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 48d36ce59efb..8348f366ddd1 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -61,15 +61,6 @@ static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) return PAGE_SIZE; } -static inline struct iova_domain *cookie_iovad(struct iommu_domain *domain) -{ - struct iommu_dma_cookie *cookie = domain->iova_cookie; - - if (cookie->type == IOMMU_DMA_IOVA_COOKIE) - return &cookie->iovad; - return NULL; -} - static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type) { struct iommu_dma_cookie *cookie; @@ -167,22 +158,99 @@ void iommu_put_dma_cookie(struct iommu_domain *domain) } EXPORT_SYMBOL(iommu_put_dma_cookie); -static void iova_reserve_pci_windows(struct pci_dev *dev, - struct iova_domain *iovad) +/** + * iommu_dma_get_resv_regions - Reserved region driver helper + * @dev: Device from iommu_get_resv_regions() + * @list: Reserved region list from iommu_get_resv_regions() + * + * IOMMU drivers can use this to implement their .get_resv_regions callback + * for general non-IOMMU-specific reservations. Currently, this covers host + * bridge windows for PCI devices. + */ +void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) { - struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus); + struct pci_host_bridge *bridge; struct resource_entry *window; - unsigned long lo, hi; + if (!dev_is_pci(dev)) + return; + + bridge = pci_find_host_bridge(to_pci_dev(dev)->bus); resource_list_for_each_entry(window, &bridge->windows) { - if (resource_type(window->res) != IORESOURCE_MEM && - resource_type(window->res) != IORESOURCE_IO) + struct iommu_resv_region *region; + phys_addr_t start; + size_t length; + + if (resource_type(window->res) != IORESOURCE_MEM) + continue; + + start = window->res->start - window->offset; + length = window->res->end - window->res->start + 1; + region = iommu_alloc_resv_region(start, length, 0, + IOMMU_RESV_RESERVED); + if (!region) + return; + + list_add_tail(®ion->list, list); + } +} +EXPORT_SYMBOL(iommu_dma_get_resv_regions); + +static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, + phys_addr_t start, phys_addr_t end) +{ + struct iova_domain *iovad = &cookie->iovad; + struct iommu_dma_msi_page *msi_page; + int i, num_pages; + + start -= iova_offset(iovad, start); + num_pages = iova_align(iovad, end - start) >> iova_shift(iovad); + + msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL); + if (!msi_page) + return -ENOMEM; + + for (i = 0; i < num_pages; i++) { + msi_page[i].phys = start; + msi_page[i].iova = start; + INIT_LIST_HEAD(&msi_page[i].list); + list_add(&msi_page[i].list, &cookie->msi_page_list); + start += iovad->granule; + } + + return 0; +} + +static int iova_reserve_iommu_regions(struct device *dev, + struct iommu_domain *domain) +{ + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + struct iommu_resv_region *region; + LIST_HEAD(resv_regions); + int ret = 0; + + iommu_get_resv_regions(dev, &resv_regions); + list_for_each_entry(region, &resv_regions, list) { + unsigned long lo, hi; + + /* We ARE the software that manages these! */ + if (region->type == IOMMU_RESV_SW_MSI) continue; - lo = iova_pfn(iovad, window->res->start - window->offset); - hi = iova_pfn(iovad, window->res->end - window->offset); + lo = iova_pfn(iovad, region->start); + hi = iova_pfn(iovad, region->start + region->length - 1); reserve_iova(iovad, lo, hi); + + if (region->type == IOMMU_RESV_MSI) + ret = cookie_init_hw_msi_region(cookie, region->start, + region->start + region->length); + if (ret) + break; } + iommu_put_resv_regions(dev, &resv_regions); + + return ret; } /** @@ -203,7 +271,6 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; unsigned long order, base_pfn, end_pfn; - bool pci = dev && dev_is_pci(dev); if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) return -EINVAL; @@ -233,7 +300,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, * leave the cache limit at the top of their range to save an rb_last() * traversal on every allocation. */ - if (pci) + if (dev && dev_is_pci(dev)) end_pfn &= DMA_BIT_MASK(32) >> order; /* start_pfn is always nonzero for an already-initialised domain */ @@ -248,12 +315,15 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, * area cache limit down for the benefit of the smaller one. */ iovad->dma_32bit_pfn = min(end_pfn, iovad->dma_32bit_pfn); - } else { - init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn); - if (pci) - iova_reserve_pci_windows(to_pci_dev(dev), iovad); + + return 0; } - return 0; + + init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn); + if (!dev) + return 0; + + return iova_reserve_iommu_regions(dev, domain); } EXPORT_SYMBOL(iommu_dma_init_domain); @@ -286,48 +356,67 @@ int dma_info_to_prot(enum dma_data_direction dir, bool coherent, } } -static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size, - dma_addr_t dma_limit, struct device *dev) +static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, + size_t size, dma_addr_t dma_limit, struct device *dev) { - struct iova_domain *iovad = cookie_iovad(domain); - unsigned long shift = iova_shift(iovad); - unsigned long length = iova_align(iovad, size) >> shift; - struct iova *iova = NULL; + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + unsigned long shift, iova_len, iova = 0; + + if (cookie->type == IOMMU_DMA_MSI_COOKIE) { + cookie->msi_iova += size; + return cookie->msi_iova - size; + } + + shift = iova_shift(iovad); + iova_len = size >> shift; + /* + * Freeing non-power-of-two-sized allocations back into the IOVA caches + * will come back to bite us badly, so we have to waste a bit of space + * rounding up anything cacheable to make sure that can't happen. The + * order of the unadjusted size will still match upon freeing. + */ + if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) + iova_len = roundup_pow_of_two(iova_len); if (domain->geometry.force_aperture) dma_limit = min(dma_limit, domain->geometry.aperture_end); /* Try to get PCI devices a SAC address */ if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) - iova = alloc_iova(iovad, length, DMA_BIT_MASK(32) >> shift, - true); - /* - * Enforce size-alignment to be safe - there could perhaps be an - * attribute to control this per-device, or at least per-domain... - */ + iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift); + if (!iova) - iova = alloc_iova(iovad, length, dma_limit >> shift, true); + iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift); - return iova; + return (dma_addr_t)iova << shift; } -/* The IOVA allocator knows what we mapped, so just unmap whatever that was */ -static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr) +static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, + dma_addr_t iova, size_t size) { - struct iova_domain *iovad = cookie_iovad(domain); + struct iova_domain *iovad = &cookie->iovad; unsigned long shift = iova_shift(iovad); - unsigned long pfn = dma_addr >> shift; - struct iova *iova = find_iova(iovad, pfn); - size_t size; - if (WARN_ON(!iova)) - return; + /* The MSI case is only ever cleaning up its most recent allocation */ + if (cookie->type == IOMMU_DMA_MSI_COOKIE) + cookie->msi_iova -= size; + else + free_iova_fast(iovad, iova >> shift, size >> shift); +} + +static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr, + size_t size) +{ + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + size_t iova_off = iova_offset(iovad, dma_addr); + + dma_addr -= iova_off; + size = iova_align(iovad, size + iova_off); - size = iova_size(iova) << shift; - size -= iommu_unmap(domain, pfn << shift, size); - /* ...and if we can't, then something is horribly, horribly wrong */ - WARN_ON(size > 0); - __free_iova(iovad, iova); + WARN_ON(iommu_unmap(domain, dma_addr, size) != size); + iommu_dma_free_iova(cookie, dma_addr, size); } static void __iommu_dma_free_pages(struct page **pages, int count) @@ -409,7 +498,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, void iommu_dma_free(struct device *dev, struct page **pages, size_t size, dma_addr_t *handle) { - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle); + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size); __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); *handle = DMA_ERROR_CODE; } @@ -437,11 +526,11 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, void (*flush_page)(struct device *, const void *, phys_addr_t)) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct iova_domain *iovad = cookie_iovad(domain); - struct iova *iova; + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; struct page **pages; struct sg_table sgt; - dma_addr_t dma_addr; + dma_addr_t iova; unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap; *handle = DMA_ERROR_CODE; @@ -461,11 +550,11 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, if (!pages) return NULL; - iova = __alloc_iova(domain, size, dev->coherent_dma_mask, dev); + size = iova_align(iovad, size); + iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev); if (!iova) goto out_free_pages; - size = iova_align(iovad, size); if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL)) goto out_free_iova; @@ -481,19 +570,18 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, sg_miter_stop(&miter); } - dma_addr = iova_dma_addr(iovad, iova); - if (iommu_map_sg(domain, dma_addr, sgt.sgl, sgt.orig_nents, prot) + if (iommu_map_sg(domain, iova, sgt.sgl, sgt.orig_nents, prot) < size) goto out_free_sg; - *handle = dma_addr; + *handle = iova; sg_free_table(&sgt); return pages; out_free_sg: sg_free_table(&sgt); out_free_iova: - __free_iova(iovad, iova); + iommu_dma_free_iova(cookie, iova, size); out_free_pages: __iommu_dma_free_pages(pages, count); return NULL; @@ -527,22 +615,22 @@ int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma) static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, size_t size, int prot) { - dma_addr_t dma_addr; struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct iova_domain *iovad = cookie_iovad(domain); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; size_t iova_off = iova_offset(iovad, phys); - size_t len = iova_align(iovad, size + iova_off); - struct iova *iova = __alloc_iova(domain, len, dma_get_mask(dev), dev); + dma_addr_t iova; + size = iova_align(iovad, size + iova_off); + iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); if (!iova) return DMA_ERROR_CODE; - dma_addr = iova_dma_addr(iovad, iova); - if (iommu_map(domain, dma_addr, phys - iova_off, len, prot)) { - __free_iova(iovad, iova); + if (iommu_map(domain, iova, phys - iova_off, size, prot)) { + iommu_dma_free_iova(cookie, iova, size); return DMA_ERROR_CODE; } - return dma_addr + iova_off; + return iova + iova_off; } dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, @@ -554,7 +642,7 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle); + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size); } /* @@ -643,10 +731,10 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int prot) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct iova_domain *iovad = cookie_iovad(domain); - struct iova *iova; + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; struct scatterlist *s, *prev = NULL; - dma_addr_t dma_addr; + dma_addr_t iova; size_t iova_len = 0; unsigned long mask = dma_get_seg_boundary(dev); int i; @@ -690,7 +778,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, prev = s; } - iova = __alloc_iova(domain, iova_len, dma_get_mask(dev), dev); + iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev); if (!iova) goto out_restore_sg; @@ -698,14 +786,13 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, * We'll leave any physical concatenation to the IOMMU driver's * implementation - it knows better than we do. */ - dma_addr = iova_dma_addr(iovad, iova); - if (iommu_map_sg(domain, dma_addr, sg, nents, prot) < iova_len) + if (iommu_map_sg(domain, iova, sg, nents, prot) < iova_len) goto out_free_iova; - return __finalise_sg(dev, sg, nents, dma_addr); + return __finalise_sg(dev, sg, nents, iova); out_free_iova: - __free_iova(iovad, iova); + iommu_dma_free_iova(cookie, iova, iova_len); out_restore_sg: __invalidate_sg(sg, nents); return 0; @@ -714,11 +801,21 @@ out_restore_sg: void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { + dma_addr_t start, end; + struct scatterlist *tmp; + int i; /* * The scatterlist segments are mapped into a single * contiguous IOVA allocation, so this is incredibly easy. */ - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg)); + start = sg_dma_address(sg); + for_each_sg(sg_next(sg), tmp, nents - 1, i) { + if (sg_dma_len(tmp) == 0) + break; + sg = tmp; + } + end = sg_dma_address(sg) + sg_dma_len(sg); + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start); } dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, @@ -731,7 +828,7 @@ dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle); + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size); } int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) @@ -744,8 +841,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, { struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iommu_dma_msi_page *msi_page; - struct iova_domain *iovad = cookie_iovad(domain); - struct iova *iova; + dma_addr_t iova; int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; size_t size = cookie_msi_granule(cookie); @@ -758,29 +854,16 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, if (!msi_page) return NULL; - msi_page->phys = msi_addr; - if (iovad) { - iova = __alloc_iova(domain, size, dma_get_mask(dev), dev); - if (!iova) - goto out_free_page; - msi_page->iova = iova_dma_addr(iovad, iova); - } else { - msi_page->iova = cookie->msi_iova; - cookie->msi_iova += size; - } - - if (iommu_map(domain, msi_page->iova, msi_addr, size, prot)) - goto out_free_iova; + iova = __iommu_dma_map(dev, msi_addr, size, prot); + if (iommu_dma_mapping_error(dev, iova)) + goto out_free_page; INIT_LIST_HEAD(&msi_page->list); + msi_page->phys = msi_addr; + msi_page->iova = iova; list_add(&msi_page->list, &cookie->msi_page_list); return msi_page; -out_free_iova: - if (iovad) - __free_iova(iovad, iova); - else - cookie->msi_iova -= size; out_free_page: kfree(msi_page); return NULL; diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 36e3f430d265..cbf7763d8091 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -311,7 +311,7 @@ static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info) ((void *)drhd) + drhd->header.length, dmaru->segment, dmaru->devices, dmaru->devices_cnt); - if (ret != 0) + if (ret) break; } if (ret >= 0) @@ -391,7 +391,7 @@ static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg) { struct acpi_dmar_hardware_unit *drhd; struct dmar_drhd_unit *dmaru; - int ret = 0; + int ret; drhd = (struct acpi_dmar_hardware_unit *)header; dmaru = dmar_find_dmaru(drhd); @@ -551,17 +551,16 @@ static int __init dmar_table_detect(void) status = AE_NOT_FOUND; } - return (ACPI_SUCCESS(status) ? 1 : 0); + return ACPI_SUCCESS(status) ? 0 : -ENOENT; } static int dmar_walk_remapping_entries(struct acpi_dmar_header *start, size_t len, struct dmar_res_callback *cb) { - int ret = 0; struct acpi_dmar_header *iter, *next; struct acpi_dmar_header *end = ((void *)start) + len; - for (iter = start; iter < end && ret == 0; iter = next) { + for (iter = start; iter < end; iter = next) { next = (void *)iter + iter->length; if (iter->length == 0) { /* Avoid looping forever on bad ACPI tables */ @@ -570,8 +569,7 @@ static int dmar_walk_remapping_entries(struct acpi_dmar_header *start, } else if (next > end) { /* Avoid passing table end */ pr_warn(FW_BUG "Record passes table end\n"); - ret = -EINVAL; - break; + return -EINVAL; } if (cb->print_entry) @@ -582,15 +580,19 @@ static int dmar_walk_remapping_entries(struct acpi_dmar_header *start, pr_debug("Unknown DMAR structure type %d\n", iter->type); } else if (cb->cb[iter->type]) { + int ret; + ret = cb->cb[iter->type](iter, cb->arg[iter->type]); + if (ret) + return ret; } else if (!cb->ignore_unhandled) { pr_warn("No handler for DMAR structure type %d\n", iter->type); - ret = -EINVAL; + return -EINVAL; } } - return ret; + return 0; } static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar, @@ -607,8 +609,8 @@ static int __init parse_dmar_table(void) { struct acpi_table_dmar *dmar; - int ret = 0; int drhd_count = 0; + int ret; struct dmar_res_callback cb = { .print_entry = true, .ignore_unhandled = true, @@ -891,17 +893,17 @@ int __init detect_intel_iommu(void) down_write(&dmar_global_lock); ret = dmar_table_detect(); - if (ret) - ret = !dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl, - &validate_drhd_cb); - if (ret && !no_iommu && !iommu_detected && !dmar_disabled) { + if (!ret) + ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl, + &validate_drhd_cb); + if (!ret && !no_iommu && !iommu_detected && !dmar_disabled) { iommu_detected = 1; /* Make sure ACS will be enabled */ pci_request_acs(); } #ifdef CONFIG_X86 - if (ret) + if (!ret) x86_init.iommu.iommu_init = intel_iommu_init; #endif @@ -911,10 +913,9 @@ int __init detect_intel_iommu(void) } up_write(&dmar_global_lock); - return ret ? 1 : -ENODEV; + return ret ? ret : 1; } - static void unmap_iommu(struct intel_iommu *iommu) { iounmap(iommu->reg); diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index c01bfcdb2383..2395478dde75 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -171,6 +171,9 @@ static u32 lv2ent_offset(sysmmu_iova_t iova) #define REG_V5_PT_BASE_PFN 0x00C #define REG_V5_MMU_FLUSH_ALL 0x010 #define REG_V5_MMU_FLUSH_ENTRY 0x014 +#define REG_V5_MMU_FLUSH_RANGE 0x018 +#define REG_V5_MMU_FLUSH_START 0x020 +#define REG_V5_MMU_FLUSH_END 0x024 #define REG_V5_INT_STATUS 0x060 #define REG_V5_INT_CLEAR 0x064 #define REG_V5_FAULT_AR_VA 0x070 @@ -319,14 +322,23 @@ static void __sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data, { unsigned int i; - for (i = 0; i < num_inv; i++) { - if (MMU_MAJ_VER(data->version) < 5) + if (MMU_MAJ_VER(data->version) < 5) { + for (i = 0; i < num_inv; i++) { writel((iova & SPAGE_MASK) | 1, data->sfrbase + REG_MMU_FLUSH_ENTRY); - else + iova += SPAGE_SIZE; + } + } else { + if (num_inv == 1) { writel((iova & SPAGE_MASK) | 1, data->sfrbase + REG_V5_MMU_FLUSH_ENTRY); - iova += SPAGE_SIZE; + } else { + writel((iova & SPAGE_MASK), + data->sfrbase + REG_V5_MMU_FLUSH_START); + writel((iova & SPAGE_MASK) + (num_inv - 1) * SPAGE_SIZE, + data->sfrbase + REG_V5_MMU_FLUSH_END); + writel(1, data->sfrbase + REG_V5_MMU_FLUSH_RANGE); + } } } @@ -747,16 +759,8 @@ static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type) goto err_counter; /* Workaround for System MMU v3.3 to prevent caching 1MiB mapping */ - for (i = 0; i < NUM_LV1ENTRIES; i += 8) { - domain->pgtable[i + 0] = ZERO_LV2LINK; - domain->pgtable[i + 1] = ZERO_LV2LINK; - domain->pgtable[i + 2] = ZERO_LV2LINK; - domain->pgtable[i + 3] = ZERO_LV2LINK; - domain->pgtable[i + 4] = ZERO_LV2LINK; - domain->pgtable[i + 5] = ZERO_LV2LINK; - domain->pgtable[i + 6] = ZERO_LV2LINK; - domain->pgtable[i + 7] = ZERO_LV2LINK; - } + for (i = 0; i < NUM_LV1ENTRIES; i++) + domain->pgtable[i] = ZERO_LV2LINK; handle = dma_map_single(dma_dev, domain->pgtable, LV1TABLE_SIZE, DMA_TO_DEVICE); diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index aab723f91f12..c3434f29c967 100644 --- a/drivers/iommu/fsl_pamu.h +++ b/drivers/iommu/fsl_pamu.h @@ -20,6 +20,7 @@ #define __FSL_PAMU_H #include <linux/iommu.h> +#include <linux/pci.h> #include <asm/fsl_pamu_stash.h> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index d412a313a372..90ab0115d78e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -183,6 +183,7 @@ static int rwbf_quirk; * (used when kernel is launched w/ TXT) */ static int force_on = 0; +int intel_iommu_tboot_noforce; /* * 0: Present @@ -607,6 +608,10 @@ static int __init intel_iommu_setup(char *str) "Intel-IOMMU: enable pre-production PASID support\n"); intel_iommu_pasid28 = 1; iommu_identity_mapping |= IDENTMAP_GFX; + } else if (!strncmp(str, "tboot_noforce", 13)) { + printk(KERN_INFO + "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); + intel_iommu_tboot_noforce = 1; } str += strcspn(str, ","); @@ -4730,6 +4735,15 @@ static int intel_iommu_cpu_dead(unsigned int cpu) return 0; } +static void intel_disable_iommus(void) +{ + struct intel_iommu *iommu = NULL; + struct dmar_drhd_unit *drhd; + + for_each_iommu(iommu, drhd) + iommu_disable_translation(iommu); +} + static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev) { return container_of(dev, struct intel_iommu, iommu.dev); @@ -4840,8 +4854,28 @@ int __init intel_iommu_init(void) goto out_free_dmar; } - if (no_iommu || dmar_disabled) + if (no_iommu || dmar_disabled) { + /* + * We exit the function here to ensure IOMMU's remapping and + * mempool aren't setup, which means that the IOMMU's PMRs + * won't be disabled via the call to init_dmars(). So disable + * it explicitly here. The PMRs were setup by tboot prior to + * calling SENTER, but the kernel is expected to reset/tear + * down the PMRs. + */ + if (intel_iommu_tboot_noforce) { + for_each_iommu(iommu, drhd) + iommu_disable_protect_mem_regions(iommu); + } + + /* + * Make sure the IOMMUs are switched off, even when we + * boot into a kexec kernel and the previous kernel left + * them enabled + */ + intel_disable_iommus(); goto out_free_dmar; + } if (list_empty(&dmar_rmrr_units)) pr_info("No RMRR found\n"); diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index ac596928f6b4..a190cbd76ef7 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -408,14 +408,6 @@ static int iommu_load_old_irte(struct intel_iommu *iommu) size_t size; u64 irta; - if (!is_kdump_kernel()) { - pr_warn("IRQ remapping was enabled on %s but we are not in kdump mode\n", - iommu->name); - clear_ir_pre_enabled(iommu); - iommu_disable_irq_remapping(iommu); - return -EINVAL; - } - /* Check whether the old ir-table has the same size as ours */ irta = dmar_readq(iommu->reg + DMAR_IRTA_REG); if ((irta & INTR_REMAP_TABLE_REG_SIZE_MASK) @@ -567,7 +559,12 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) init_ir_status(iommu); if (ir_pre_enabled(iommu)) { - if (iommu_load_old_irte(iommu)) + if (!is_kdump_kernel()) { + pr_warn("IRQ remapping was enabled on %s but we are not in kdump mode\n", + iommu->name); + clear_ir_pre_enabled(iommu); + iommu_disable_irq_remapping(iommu); + } else if (iommu_load_old_irte(iommu)) pr_err("Failed to copy IR table for %s from previous kernel\n", iommu->name); else diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index f9bc6ebb8140..6e5df5e0a3bd 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -74,7 +74,7 @@ /* Calculate the block/page mapping size at level l for pagetable in d. */ #define ARM_LPAE_BLOCK_SIZE(l,d) \ - (1 << (ilog2(sizeof(arm_lpae_iopte)) + \ + (1ULL << (ilog2(sizeof(arm_lpae_iopte)) + \ ((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level))) /* Page table bits */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3b67144dead2..cf7ca7e70777 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -36,6 +36,7 @@ static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); +static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA; struct iommu_callback_data { const struct iommu_ops *ops; @@ -112,6 +113,18 @@ static int __iommu_attach_group(struct iommu_domain *domain, static void __iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group); +static int __init iommu_set_def_domain_type(char *str) +{ + bool pt; + + if (!str || strtobool(str, &pt)) + return -EINVAL; + + iommu_def_domain_type = pt ? IOMMU_DOMAIN_IDENTITY : IOMMU_DOMAIN_DMA; + return 0; +} +early_param("iommu.passthrough", iommu_set_def_domain_type); + static ssize_t iommu_group_attr_show(struct kobject *kobj, struct attribute *__attr, char *buf) { @@ -1015,10 +1028,19 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) * IOMMU driver. */ if (!group->default_domain) { - group->default_domain = __iommu_domain_alloc(dev->bus, - IOMMU_DOMAIN_DMA); + struct iommu_domain *dom; + + dom = __iommu_domain_alloc(dev->bus, iommu_def_domain_type); + if (!dom && iommu_def_domain_type != IOMMU_DOMAIN_DMA) { + dev_warn(dev, + "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA", + iommu_def_domain_type); + dom = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_DMA); + } + + group->default_domain = dom; if (!group->domain) - group->domain = group->default_domain; + group->domain = dom; } ret = iommu_group_add_device(group, dev); @@ -1083,8 +1105,12 @@ static int iommu_bus_notifier(struct notifier_block *nb, * result in ADD/DEL notifiers to group->notifier */ if (action == BUS_NOTIFY_ADD_DEVICE) { - if (ops->add_device) - return ops->add_device(dev); + if (ops->add_device) { + int ret; + + ret = ops->add_device(dev); + return (ret) ? NOTIFY_DONE : NOTIFY_OK; + } } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { if (ops->remove_device && dev->iommu_group) { ops->remove_device(dev); @@ -1652,6 +1678,48 @@ void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr) } EXPORT_SYMBOL_GPL(iommu_domain_window_disable); +/** + * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework + * @domain: the iommu domain where the fault has happened + * @dev: the device where the fault has happened + * @iova: the faulting address + * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) + * + * This function should be called by the low-level IOMMU implementations + * whenever IOMMU faults happen, to allow high-level users, that are + * interested in such events, to know about them. + * + * This event may be useful for several possible use cases: + * - mere logging of the event + * - dynamic TLB/PTE loading + * - if restarting of the faulting device is required + * + * Returns 0 on success and an appropriate error code otherwise (if dynamic + * PTE/TLB loading will one day be supported, implementations will be able + * to tell whether it succeeded or not according to this return value). + * + * Specifically, -ENOSYS is returned if a fault handler isn't installed + * (though fault handlers can also return -ENOSYS, in case they want to + * elicit the default behavior of the IOMMU drivers). + */ +int report_iommu_fault(struct iommu_domain *domain, struct device *dev, + unsigned long iova, int flags) +{ + int ret = -ENOSYS; + + /* + * if upper layers showed interest and installed a fault handler, + * invoke it. + */ + if (domain->handler) + ret = domain->handler(domain, dev, iova, flags, + domain->handler_token); + + trace_io_page_fault(dev, iova, flags); + return ret; +} +EXPORT_SYMBOL_GPL(report_iommu_fault); + static int __init iommu_init(void) { iommu_group_kset = kset_create_and_add("iommu_groups", diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index e80a4105ac2a..5c88ba70e4e0 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -166,7 +166,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, break; /* found a free slot */ } adjust_limit_pfn: - limit_pfn = curr_iova->pfn_lo - 1; + limit_pfn = curr_iova->pfn_lo ? (curr_iova->pfn_lo - 1) : 0; move_left: prev = curr; curr = rb_prev(curr); diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 19e010083408..a27ef570c328 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -431,9 +431,10 @@ err_release_mapping: static int mtk_iommu_add_device(struct device *dev) { - struct iommu_group *group; struct of_phandle_args iommu_spec; struct of_phandle_iterator it; + struct mtk_iommu_data *data; + struct iommu_group *group; int err; of_for_each_phandle(&it, err, dev->of_node, "iommus", @@ -450,6 +451,9 @@ static int mtk_iommu_add_device(struct device *dev) if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops) return -ENODEV; /* Not a iommu client device */ + data = dev->iommu_fwspec->iommu_priv; + iommu_device_link(&data->iommu, dev); + group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) return PTR_ERR(group); @@ -460,9 +464,14 @@ static int mtk_iommu_add_device(struct device *dev) static void mtk_iommu_remove_device(struct device *dev) { + struct mtk_iommu_data *data; + if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops) return; + data = dev->iommu_fwspec->iommu_priv; + iommu_device_unlink(&data->iommu, dev); + iommu_group_remove_device(dev); iommu_fwspec_free(dev); } @@ -627,6 +636,17 @@ static int mtk_iommu_probe(struct platform_device *pdev) if (ret) return ret; + ret = iommu_device_sysfs_add(&data->iommu, &pdev->dev, NULL, + dev_name(&pdev->dev)); + if (ret) + return ret; + + iommu_device_set_ops(&data->iommu, &mtk_iommu_ops); + + ret = iommu_device_register(&data->iommu); + if (ret) + return ret; + if (!iommu_present(&platform_bus_type)) bus_set_iommu(&platform_bus_type, &mtk_iommu_ops); @@ -637,6 +657,9 @@ static int mtk_iommu_remove(struct platform_device *pdev) { struct mtk_iommu_data *data = platform_get_drvdata(pdev); + iommu_device_sysfs_remove(&data->iommu); + iommu_device_unregister(&data->iommu); + if (iommu_present(&platform_bus_type)) bus_set_iommu(&platform_bus_type, NULL); diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 2683e9fc0dcf..9f44ee8ea1bc 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -96,6 +96,49 @@ int of_get_dma_window(struct device_node *dn, const char *prefix, int index, } EXPORT_SYMBOL_GPL(of_get_dma_window); +static bool of_iommu_driver_present(struct device_node *np) +{ + /* + * If the IOMMU still isn't ready by the time we reach init, assume + * it never will be. We don't want to defer indefinitely, nor attempt + * to dereference __iommu_of_table after it's been freed. + */ + if (system_state > SYSTEM_BOOTING) + return false; + + return of_match_node(&__iommu_of_table, np); +} + +static const struct iommu_ops +*of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec) +{ + const struct iommu_ops *ops; + struct fwnode_handle *fwnode = &iommu_spec->np->fwnode; + int err; + + ops = iommu_ops_from_fwnode(fwnode); + if ((ops && !ops->of_xlate) || + (!ops && !of_iommu_driver_present(iommu_spec->np))) + return NULL; + + err = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops); + if (err) + return ERR_PTR(err); + /* + * The otherwise-empty fwspec handily serves to indicate the specific + * IOMMU device we're waiting for, which will be useful if we ever get + * a proper probe-ordering dependency mechanism in future. + */ + if (!ops) + return ERR_PTR(-EPROBE_DEFER); + + err = ops->of_xlate(dev, iommu_spec); + if (err) + return ERR_PTR(err); + + return ops; +} + static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data) { struct of_phandle_args *iommu_spec = data; @@ -105,10 +148,11 @@ static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data) } static const struct iommu_ops -*of_pci_iommu_configure(struct pci_dev *pdev, struct device_node *bridge_np) +*of_pci_iommu_init(struct pci_dev *pdev, struct device_node *bridge_np) { const struct iommu_ops *ops; struct of_phandle_args iommu_spec; + int err; /* * Start by tracing the RID alias down the PCI topology as @@ -123,56 +167,76 @@ static const struct iommu_ops * bus into the system beyond, and which IOMMU it ends up at. */ iommu_spec.np = NULL; - if (of_pci_map_rid(bridge_np, iommu_spec.args[0], "iommu-map", - "iommu-map-mask", &iommu_spec.np, iommu_spec.args)) - return NULL; + err = of_pci_map_rid(bridge_np, iommu_spec.args[0], "iommu-map", + "iommu-map-mask", &iommu_spec.np, + iommu_spec.args); + if (err) + return err == -ENODEV ? NULL : ERR_PTR(err); - ops = iommu_ops_from_fwnode(&iommu_spec.np->fwnode); - if (!ops || !ops->of_xlate || - iommu_fwspec_init(&pdev->dev, &iommu_spec.np->fwnode, ops) || - ops->of_xlate(&pdev->dev, &iommu_spec)) - ops = NULL; + ops = of_iommu_xlate(&pdev->dev, &iommu_spec); of_node_put(iommu_spec.np); return ops; } -const struct iommu_ops *of_iommu_configure(struct device *dev, - struct device_node *master_np) +static const struct iommu_ops +*of_platform_iommu_init(struct device *dev, struct device_node *np) { struct of_phandle_args iommu_spec; - struct device_node *np; const struct iommu_ops *ops = NULL; int idx = 0; - if (dev_is_pci(dev)) - return of_pci_iommu_configure(to_pci_dev(dev), master_np); - /* * We don't currently walk up the tree looking for a parent IOMMU. * See the `Notes:' section of * Documentation/devicetree/bindings/iommu/iommu.txt */ - while (!of_parse_phandle_with_args(master_np, "iommus", - "#iommu-cells", idx, - &iommu_spec)) { - np = iommu_spec.np; - ops = iommu_ops_from_fwnode(&np->fwnode); - - if (!ops || !ops->of_xlate || - iommu_fwspec_init(dev, &np->fwnode, ops) || - ops->of_xlate(dev, &iommu_spec)) - goto err_put_node; - - of_node_put(np); + while (!of_parse_phandle_with_args(np, "iommus", "#iommu-cells", + idx, &iommu_spec)) { + ops = of_iommu_xlate(dev, &iommu_spec); + of_node_put(iommu_spec.np); idx++; + if (IS_ERR_OR_NULL(ops)) + break; } return ops; +} + +const struct iommu_ops *of_iommu_configure(struct device *dev, + struct device_node *master_np) +{ + const struct iommu_ops *ops; + struct iommu_fwspec *fwspec = dev->iommu_fwspec; + + if (!master_np) + return NULL; + + if (fwspec) { + if (fwspec->ops) + return fwspec->ops; + + /* In the deferred case, start again from scratch */ + iommu_fwspec_free(dev); + } -err_put_node: - of_node_put(np); - return NULL; + if (dev_is_pci(dev)) + ops = of_pci_iommu_init(to_pci_dev(dev), master_np); + else + ops = of_platform_iommu_init(dev, master_np); + /* + * If we have reason to believe the IOMMU driver missed the initial + * add_device callback for dev, replay it to get things in order. + */ + if (!IS_ERR_OR_NULL(ops) && ops->add_device && + dev->bus && !dev->iommu_group) { + int err = ops->add_device(dev); + + if (err) + ops = ERR_PTR(err); + } + + return ops; } static int __init of_iommu_init(void) @@ -183,7 +247,7 @@ static int __init of_iommu_init(void) for_each_matching_node_and_match(np, matches, &match) { const of_iommu_init_fn init_fn = match->data; - if (init_fn(np)) + if (init_fn && init_fn(np)) pr_err("Failed to initialise IOMMU %s\n", of_node_full_name(np)); } diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index e2583cce2cc1..95dfca36ccb9 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -36,28 +36,14 @@ #include "omap-iopgtable.h" #include "omap-iommu.h" +static const struct iommu_ops omap_iommu_ops; + #define to_iommu(dev) \ ((struct omap_iommu *)platform_get_drvdata(to_platform_device(dev))) /* bitmap of the page sizes currently supported */ #define OMAP_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M) -/** - * struct omap_iommu_domain - omap iommu domain - * @pgtable: the page table - * @iommu_dev: an omap iommu device attached to this domain. only a single - * iommu device can be attached for now. - * @dev: Device using this domain. - * @lock: domain lock, should be taken when attaching/detaching - */ -struct omap_iommu_domain { - u32 *pgtable; - struct omap_iommu *iommu_dev; - struct device *dev; - spinlock_t lock; - struct iommu_domain domain; -}; - #define MMU_LOCK_BASE_SHIFT 10 #define MMU_LOCK_BASE_MASK (0x1f << MMU_LOCK_BASE_SHIFT) #define MMU_LOCK_BASE(x) \ @@ -818,33 +804,14 @@ static irqreturn_t iommu_fault_handler(int irq, void *data) return IRQ_NONE; } -static int device_match_by_alias(struct device *dev, void *data) -{ - struct omap_iommu *obj = to_iommu(dev); - const char *name = data; - - pr_debug("%s: %s %s\n", __func__, obj->name, name); - - return strcmp(obj->name, name) == 0; -} - /** * omap_iommu_attach() - attach iommu device to an iommu domain - * @name: name of target omap iommu device + * @obj: target omap iommu device * @iopgd: page table **/ -static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd) +static int omap_iommu_attach(struct omap_iommu *obj, u32 *iopgd) { int err; - struct device *dev; - struct omap_iommu *obj; - - dev = driver_find_device(&omap_iommu_driver.driver, NULL, (void *)name, - device_match_by_alias); - if (!dev) - return ERR_PTR(-ENODEV); - - obj = to_iommu(dev); spin_lock(&obj->iommu_lock); @@ -857,11 +824,13 @@ static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd) spin_unlock(&obj->iommu_lock); dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); - return obj; + + return 0; err_enable: spin_unlock(&obj->iommu_lock); - return ERR_PTR(err); + + return err; } /** @@ -928,28 +897,26 @@ static int omap_iommu_probe(struct platform_device *pdev) int irq; struct omap_iommu *obj; struct resource *res; - struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev); struct device_node *of = pdev->dev.of_node; + if (!of) { + pr_err("%s: only DT-based devices are supported\n", __func__); + return -ENODEV; + } + obj = devm_kzalloc(&pdev->dev, sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL); if (!obj) return -ENOMEM; - if (of) { - obj->name = dev_name(&pdev->dev); - obj->nr_tlb_entries = 32; - err = of_property_read_u32(of, "ti,#tlb-entries", - &obj->nr_tlb_entries); - if (err && err != -EINVAL) - return err; - if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8) - return -EINVAL; - if (of_find_property(of, "ti,iommu-bus-err-back", NULL)) - obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN; - } else { - obj->nr_tlb_entries = pdata->nr_tlb_entries; - obj->name = pdata->name; - } + obj->name = dev_name(&pdev->dev); + obj->nr_tlb_entries = 32; + err = of_property_read_u32(of, "ti,#tlb-entries", &obj->nr_tlb_entries); + if (err && err != -EINVAL) + return err; + if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8) + return -EINVAL; + if (of_find_property(of, "ti,iommu-bus-err-back", NULL)) + obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN; obj->dev = &pdev->dev; obj->ctx = (void *)obj + sizeof(*obj); @@ -976,19 +943,46 @@ static int omap_iommu_probe(struct platform_device *pdev) return err; platform_set_drvdata(pdev, obj); + obj->group = iommu_group_alloc(); + if (IS_ERR(obj->group)) + return PTR_ERR(obj->group); + + err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, obj->name); + if (err) + goto out_group; + + iommu_device_set_ops(&obj->iommu, &omap_iommu_ops); + + err = iommu_device_register(&obj->iommu); + if (err) + goto out_sysfs; + pm_runtime_irq_safe(obj->dev); pm_runtime_enable(obj->dev); omap_iommu_debugfs_add(obj); dev_info(&pdev->dev, "%s registered\n", obj->name); + return 0; + +out_sysfs: + iommu_device_sysfs_remove(&obj->iommu); +out_group: + iommu_group_put(obj->group); + return err; } static int omap_iommu_remove(struct platform_device *pdev) { struct omap_iommu *obj = platform_get_drvdata(pdev); + iommu_group_put(obj->group); + obj->group = NULL; + + iommu_device_sysfs_remove(&obj->iommu); + iommu_device_unregister(&obj->iommu); + omap_iommu_debugfs_remove(obj); pm_runtime_disable(obj->dev); @@ -1077,11 +1071,11 @@ static int omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); - struct omap_iommu *oiommu; struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu *oiommu; int ret = 0; - if (!arch_data || !arch_data->name) { + if (!arch_data || !arch_data->iommu_dev) { dev_err(dev, "device doesn't have an associated iommu\n"); return -EINVAL; } @@ -1095,15 +1089,16 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) goto out; } + oiommu = arch_data->iommu_dev; + /* get a handle to and enable the omap iommu */ - oiommu = omap_iommu_attach(arch_data->name, omap_domain->pgtable); - if (IS_ERR(oiommu)) { - ret = PTR_ERR(oiommu); + ret = omap_iommu_attach(oiommu, omap_domain->pgtable); + if (ret) { dev_err(dev, "can't get omap iommu: %d\n", ret); goto out; } - omap_domain->iommu_dev = arch_data->iommu_dev = oiommu; + omap_domain->iommu_dev = oiommu; omap_domain->dev = dev; oiommu->domain = domain; @@ -1116,7 +1111,6 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, struct device *dev) { struct omap_iommu *oiommu = dev_to_omap_iommu(dev); - struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; /* only a single device is supported per domain for now */ if (omap_domain->iommu_dev != oiommu) { @@ -1128,7 +1122,7 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, omap_iommu_detach(oiommu); - omap_domain->iommu_dev = arch_data->iommu_dev = NULL; + omap_domain->iommu_dev = NULL; omap_domain->dev = NULL; oiommu->domain = NULL; } @@ -1232,8 +1226,11 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain, static int omap_iommu_add_device(struct device *dev) { struct omap_iommu_arch_data *arch_data; + struct omap_iommu *oiommu; + struct iommu_group *group; struct device_node *np; struct platform_device *pdev; + int ret; /* * Allocate the archdata iommu structure for DT-based devices. @@ -1254,15 +1251,41 @@ static int omap_iommu_add_device(struct device *dev) return -EINVAL; } + oiommu = platform_get_drvdata(pdev); + if (!oiommu) { + of_node_put(np); + return -EINVAL; + } + arch_data = kzalloc(sizeof(*arch_data), GFP_KERNEL); if (!arch_data) { of_node_put(np); return -ENOMEM; } - arch_data->name = kstrdup(dev_name(&pdev->dev), GFP_KERNEL); + ret = iommu_device_link(&oiommu->iommu, dev); + if (ret) { + kfree(arch_data); + of_node_put(np); + return ret; + } + + arch_data->iommu_dev = oiommu; dev->archdata.iommu = arch_data; + /* + * IOMMU group initialization calls into omap_iommu_device_group, which + * needs a valid dev->archdata.iommu pointer + */ + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) { + iommu_device_unlink(&oiommu->iommu, dev); + dev->archdata.iommu = NULL; + kfree(arch_data); + return PTR_ERR(group); + } + iommu_group_put(group); + of_node_put(np); return 0; @@ -1275,8 +1298,23 @@ static void omap_iommu_remove_device(struct device *dev) if (!dev->of_node || !arch_data) return; - kfree(arch_data->name); + iommu_device_unlink(&arch_data->iommu_dev->iommu, dev); + iommu_group_remove_device(dev); + + dev->archdata.iommu = NULL; kfree(arch_data); + +} + +static struct iommu_group *omap_iommu_device_group(struct device *dev) +{ + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct iommu_group *group = NULL; + + if (arch_data->iommu_dev) + group = arch_data->iommu_dev->group; + + return group; } static const struct iommu_ops omap_iommu_ops = { @@ -1290,6 +1328,7 @@ static const struct iommu_ops omap_iommu_ops = { .iova_to_phys = omap_iommu_iova_to_phys, .add_device = omap_iommu_add_device, .remove_device = omap_iommu_remove_device, + .device_group = omap_iommu_device_group, .pgsize_bitmap = OMAP_IOMMU_PGSIZES, }; @@ -1299,6 +1338,7 @@ static int __init omap_iommu_init(void) const unsigned long flags = SLAB_HWCACHE_ALIGN; size_t align = 1 << 10; /* L2 pagetable alignement */ struct device_node *np; + int ret; np = of_find_matching_node(NULL, omap_iommu_of_match); if (!np) @@ -1312,11 +1352,25 @@ static int __init omap_iommu_init(void) return -ENOMEM; iopte_cachep = p; - bus_set_iommu(&platform_bus_type, &omap_iommu_ops); - omap_iommu_debugfs_init(); - return platform_driver_register(&omap_iommu_driver); + ret = platform_driver_register(&omap_iommu_driver); + if (ret) { + pr_err("%s: failed to register driver\n", __func__); + goto fail_driver; + } + + ret = bus_set_iommu(&platform_bus_type, &omap_iommu_ops); + if (ret) + goto fail_bus; + + return 0; + +fail_bus: + platform_driver_unregister(&omap_iommu_driver); +fail_driver: + kmem_cache_destroy(iopte_cachep); + return ret; } subsys_initcall(omap_iommu_init); /* must be ready before omap3isp is probed */ diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index 59628e5017b4..6e70515e6038 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -14,6 +14,7 @@ #define _OMAP_IOMMU_H #include <linux/bitops.h> +#include <linux/iommu.h> #define for_each_iotlb_cr(obj, n, __i, cr) \ for (__i = 0; \ @@ -27,6 +28,23 @@ struct iotlb_entry { u32 endian, elsz, mixed; }; +/** + * struct omap_iommu_domain - omap iommu domain + * @pgtable: the page table + * @iommu_dev: an omap iommu device attached to this domain. only a single + * iommu device can be attached for now. + * @dev: Device using this domain. + * @lock: domain lock, should be taken when attaching/detaching + * @domain: generic domain handle used by iommu core code + */ +struct omap_iommu_domain { + u32 *pgtable; + struct omap_iommu *iommu_dev; + struct device *dev; + spinlock_t lock; + struct iommu_domain domain; +}; + struct omap_iommu { const char *name; void __iomem *regbase; @@ -50,6 +68,22 @@ struct omap_iommu { int has_bus_err_back; u32 id; + + struct iommu_device iommu; + struct iommu_group *group; +}; + +/** + * struct omap_iommu_arch_data - omap iommu private data + * @iommu_dev: handle of the iommu device + * + * This is an omap iommu private data object, which binds an iommu user + * to its iommu device. This object should be placed at the iommu user's + * dev_archdata so generic IOMMU API can be used without having to + * utilize omap-specific plumbing anymore. + */ +struct omap_iommu_arch_data { + struct omap_iommu *iommu_dev; }; struct cr_regs { diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 9afcbf79f0b0..4ba48a26b389 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -8,6 +8,7 @@ #include <linux/delay.h> #include <linux/device.h> #include <linux/dma-iommu.h> +#include <linux/dma-mapping.h> #include <linux/errno.h> #include <linux/interrupt.h> #include <linux/io.h> @@ -90,6 +91,7 @@ struct rk_iommu { void __iomem **bases; int num_mmu; int irq; + struct iommu_device iommu; struct list_head node; /* entry in rk_iommu_domain.iommus */ struct iommu_domain *domain; /* domain to which iommu is attached */ }; @@ -1032,6 +1034,7 @@ static int rk_iommu_group_set_iommudata(struct iommu_group *group, static int rk_iommu_add_device(struct device *dev) { struct iommu_group *group; + struct rk_iommu *iommu; int ret; if (!rk_iommu_is_dev_iommu_master(dev)) @@ -1054,6 +1057,10 @@ static int rk_iommu_add_device(struct device *dev) if (ret) goto err_remove_device; + iommu = rk_iommu_from_dev(dev); + if (iommu) + iommu_device_link(&iommu->iommu, dev); + iommu_group_put(group); return 0; @@ -1067,9 +1074,15 @@ err_put_group: static void rk_iommu_remove_device(struct device *dev) { + struct rk_iommu *iommu; + if (!rk_iommu_is_dev_iommu_master(dev)) return; + iommu = rk_iommu_from_dev(dev); + if (iommu) + iommu_device_unlink(&iommu->iommu, dev); + iommu_group_remove_device(dev); } @@ -1117,7 +1130,7 @@ static int rk_iommu_probe(struct platform_device *pdev) struct rk_iommu *iommu; struct resource *res; int num_res = pdev->num_resources; - int i; + int err, i; iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -1150,11 +1163,25 @@ static int rk_iommu_probe(struct platform_device *pdev) return -ENXIO; } - return 0; + err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev)); + if (err) + return err; + + iommu_device_set_ops(&iommu->iommu, &rk_iommu_ops); + err = iommu_device_register(&iommu->iommu); + + return err; } static int rk_iommu_remove(struct platform_device *pdev) { + struct rk_iommu *iommu = platform_get_drvdata(pdev); + + if (iommu) { + iommu_device_sysfs_remove(&iommu->iommu); + iommu_device_unregister(&iommu->iommu); + } + return 0; } diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 9305964250ac..eeb19f560a05 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -15,6 +15,7 @@ #include <linux/of_device.h> #include <linux/platform_device.h> #include <linux/slab.h> +#include <linux/dma-mapping.h> #include <soc/tegra/ahb.h> #include <soc/tegra/mc.h> diff --git a/drivers/media/platform/mtk-vpu/mtk_vpu.c b/drivers/media/platform/mtk-vpu/mtk_vpu.c index aa44e11decca..853d598937f6 100644 --- a/drivers/media/platform/mtk-vpu/mtk_vpu.c +++ b/drivers/media/platform/mtk-vpu/mtk_vpu.c @@ -23,6 +23,7 @@ #include <linux/of_reserved_mem.h> #include <linux/sched.h> #include <linux/sizes.h> +#include <linux/dma-mapping.h> #include "mtk_vpu.h" diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c index 084ecf4aa9a4..0d984a28a003 100644 --- a/drivers/media/platform/omap3isp/isp.c +++ b/drivers/media/platform/omap3isp/isp.c @@ -1943,30 +1943,13 @@ static void isp_detach_iommu(struct isp_device *isp) { arm_iommu_release_mapping(isp->mapping); isp->mapping = NULL; - iommu_group_remove_device(isp->dev); } static int isp_attach_iommu(struct isp_device *isp) { struct dma_iommu_mapping *mapping; - struct iommu_group *group; int ret; - /* Create a device group and add the device to it. */ - group = iommu_group_alloc(); - if (IS_ERR(group)) { - dev_err(isp->dev, "failed to allocate IOMMU group\n"); - return PTR_ERR(group); - } - - ret = iommu_group_add_device(group, isp->dev); - iommu_group_put(group); - - if (ret < 0) { - dev_err(isp->dev, "failed to add device to IPMMU group\n"); - return ret; - } - /* * Create the ARM mapping, used by the ARM DMA mapping core to allocate * VAs. This will allocate a corresponding IOMMU domain. diff --git a/drivers/media/platform/omap3isp/isp.h b/drivers/media/platform/omap3isp/isp.h index 7e6f6638433b..2f2ae609c548 100644 --- a/drivers/media/platform/omap3isp/isp.h +++ b/drivers/media/platform/omap3isp/isp.h @@ -23,7 +23,6 @@ #include <linux/clk-provider.h> #include <linux/device.h> #include <linux/io.h> -#include <linux/iommu.h> #include <linux/platform_device.h> #include <linux/wait.h> diff --git a/drivers/of/device.c b/drivers/of/device.c index 6e2f9113b1b7..9416d052cb89 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -82,7 +82,7 @@ int of_device_add(struct platform_device *ofdev) * can use a platform bus notifier and handle BUS_NOTIFY_ADD_DEVICE events * to fix up DMA configuration. */ -void of_dma_configure(struct device *dev, struct device_node *np) +int of_dma_configure(struct device *dev, struct device_node *np) { u64 dma_addr, paddr, size; int ret; @@ -107,7 +107,7 @@ void of_dma_configure(struct device *dev, struct device_node *np) ret = of_dma_get_range(np, &dma_addr, &paddr, &size); if (ret < 0) { dma_addr = offset = 0; - size = dev->coherent_dma_mask + 1; + size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1); } else { offset = PFN_DOWN(paddr - dma_addr); @@ -123,7 +123,7 @@ void of_dma_configure(struct device *dev, struct device_node *np) if (!size) { dev_err(dev, "Adjusted size 0x%llx invalid\n", size); - return; + return -EINVAL; } dev_dbg(dev, "dma_pfn_offset(%#08lx)\n", offset); } @@ -144,13 +144,30 @@ void of_dma_configure(struct device *dev, struct device_node *np) coherent ? " " : " not "); iommu = of_iommu_configure(dev, np); + if (IS_ERR(iommu)) + return PTR_ERR(iommu); + dev_dbg(dev, "device is%sbehind an iommu\n", iommu ? " " : " not "); arch_setup_dma_ops(dev, dma_addr, size, iommu, coherent); + + return 0; } EXPORT_SYMBOL_GPL(of_dma_configure); +/** + * of_dma_deconfigure - Clean up DMA configuration + * @dev: Device for which to clean up DMA configuration + * + * Clean up all configuration performed by of_dma_configure_ops() and free all + * resources that have been allocated. + */ +void of_dma_deconfigure(struct device *dev) +{ + arch_teardown_dma_ops(dev); +} + int of_device_register(struct platform_device *pdev) { device_initialize(&pdev->dev); diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 45b413e5a444..71fecc2debfc 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -22,6 +22,7 @@ #include <linux/slab.h> #include <linux/of_address.h> #include <linux/of_device.h> +#include <linux/of_iommu.h> #include <linux/of_irq.h> #include <linux/of_platform.h> #include <linux/platform_device.h> @@ -158,11 +159,6 @@ struct platform_device *of_device_alloc(struct device_node *np, } EXPORT_SYMBOL(of_device_alloc); -static void of_dma_deconfigure(struct device *dev) -{ - arch_teardown_dma_ops(dev); -} - /** * of_platform_device_create_pdata - Alloc, initialize and register an of_device * @np: pointer to node to create device for @@ -191,11 +187,9 @@ static struct platform_device *of_platform_device_create_pdata( dev->dev.bus = &platform_bus_type; dev->dev.platform_data = platform_data; - of_dma_configure(&dev->dev, dev->dev.of_node); of_msi_configure(&dev->dev, dev->dev.of_node); if (of_device_add(dev) != 0) { - of_dma_deconfigure(&dev->dev); platform_device_put(dev); goto err_clear_flag; } @@ -253,7 +247,6 @@ static struct amba_device *of_amba_device_create(struct device_node *node, dev_set_name(&dev->dev, "%s", bus_id); else of_device_make_bus_id(&dev->dev); - of_dma_configure(&dev->dev, dev->dev.of_node); /* Allow the HW Peripheral ID to be overridden */ prop = of_get_property(node, "arm,primecell-periphid", NULL); @@ -547,7 +540,6 @@ static int of_platform_device_destroy(struct device *dev, void *data) amba_device_unregister(to_amba_device(dev)); #endif - of_dma_deconfigure(dev); of_node_clear_flag(dev->of_node, OF_POPULATED); of_node_clear_flag(dev->of_node, OF_POPULATED_BUS); return 0; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 01eb8038fceb..19c8950c6c38 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1914,33 +1914,6 @@ static void pci_set_msi_domain(struct pci_dev *dev) dev_set_msi_domain(&dev->dev, d); } -/** - * pci_dma_configure - Setup DMA configuration - * @dev: ptr to pci_dev struct of the PCI device - * - * Function to update PCI devices's DMA configuration using the same - * info from the OF node or ACPI node of host bridge's parent (if any). - */ -static void pci_dma_configure(struct pci_dev *dev) -{ - struct device *bridge = pci_get_host_bridge_device(dev); - - if (IS_ENABLED(CONFIG_OF) && - bridge->parent && bridge->parent->of_node) { - of_dma_configure(&dev->dev, bridge->parent->of_node); - } else if (has_acpi_companion(bridge)) { - struct acpi_device *adev = to_acpi_device_node(bridge->fwnode); - enum dev_dma_attr attr = acpi_get_dma_attr(adev); - - if (attr == DEV_DMA_NOT_SUPPORTED) - dev_warn(&dev->dev, "DMA not supported.\n"); - else - acpi_dma_configure(&dev->dev, attr); - } - - pci_put_host_bridge_device(bridge); -} - void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) { int ret; @@ -1954,7 +1927,6 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) dev->dev.dma_mask = &dev->dma_mask; dev->dev.dma_parms = &dev->dma_parms; dev->dev.coherent_dma_mask = 0xffffffffull; - pci_dma_configure(dev); pci_set_dma_max_seg_size(dev, 65536); pci_set_dma_seg_boundary(dev, 0xffffffff); diff --git a/drivers/soc/fsl/qbman/qman_priv.h b/drivers/soc/fsl/qbman/qman_priv.h index 22725bdc6f15..5fe9faf6232e 100644 --- a/drivers/soc/fsl/qbman/qman_priv.h +++ b/drivers/soc/fsl/qbman/qman_priv.h @@ -33,6 +33,7 @@ #include "dpaa_sys.h" #include <soc/fsl/qman.h> +#include <linux/dma-mapping.h> #include <linux/iommu.h> #if defined(CONFIG_FSL_PAMU) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 2fc678e08d8d..73b82ac0b56b 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -577,7 +577,7 @@ struct acpi_pci_root { bool acpi_dma_supported(struct acpi_device *adev); enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); -void acpi_dma_configure(struct device *dev, enum dev_dma_attr attr); +int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr); void acpi_dma_deconfigure(struct device *dev); struct acpi_device *acpi_find_child_device(struct acpi_device *parent, diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 3558f4eb1a86..314a0b9219c6 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -566,7 +566,6 @@ IRQCHIP_OF_MATCH_TABLE() \ ACPI_PROBE_TABLE(irqchip) \ ACPI_PROBE_TABLE(clksrc) \ - ACPI_PROBE_TABLE(iort) \ EARLYCON_TABLE() #define INIT_TEXT \ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 0f9de30d725f..137e4a3d89c5 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -770,8 +770,11 @@ static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) return DEV_DMA_NOT_SUPPORTED; } -static inline void acpi_dma_configure(struct device *dev, - enum dev_dma_attr attr) { } +static inline int acpi_dma_configure(struct device *dev, + enum dev_dma_attr attr) +{ + return 0; +} static inline void acpi_dma_deconfigure(struct device *dev) { } diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 26e25d85eb3e..3ff9acea8616 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -55,7 +55,4 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) { return NULL; } #endif -#define IORT_ACPI_DECLARE(name, table_id, fn) \ - ACPI_DECLARE_PROBE_ENTRY(iort, name, table_id, 0, NULL, 0, fn) - #endif /* __ACPI_IORT_H__ */ diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 5725c94b1f12..4eac2670bfa1 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -20,6 +20,7 @@ #include <asm/errno.h> #ifdef CONFIG_IOMMU_DMA +#include <linux/dma-mapping.h> #include <linux/iommu.h> #include <linux/msi.h> @@ -71,6 +72,7 @@ int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); /* The DMA API isn't _quite_ the whole story, though... */ void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg); +void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); #else @@ -100,6 +102,10 @@ static inline void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg) { } +static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) +{ +} + #endif /* CONFIG_IOMMU_DMA */ #endif /* __KERNEL__ */ #endif /* __DMA_IOMMU_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 0977317c6835..4f3eecedca2d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -728,6 +728,18 @@ dma_mark_declared_memory_occupied(struct device *dev, } #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */ +#ifdef CONFIG_HAS_DMA +int dma_configure(struct device *dev); +void dma_deconfigure(struct device *dev); +#else +static inline int dma_configure(struct device *dev) +{ + return 0; +} + +static inline void dma_deconfigure(struct device *dev) {} +#endif + /* * Managed DMA API */ diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 187c10299722..90884072fa73 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -39,6 +39,7 @@ extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; extern int intel_iommu_enabled; +extern int intel_iommu_tboot_noforce; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) { diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index c573a52ae440..485a5b48f038 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -30,6 +30,8 @@ #include <linux/mmu_notifier.h> #include <linux/list.h> #include <linux/iommu.h> +#include <linux/io-64-nonatomic-lo-hi.h> + #include <asm/cacheflush.h> #include <asm/iommu.h> @@ -72,24 +74,8 @@ #define OFFSET_STRIDE (9) -#ifdef CONFIG_64BIT #define dmar_readq(a) readq(a) #define dmar_writeq(a,v) writeq(v,a) -#else -static inline u64 dmar_readq(void __iomem *addr) -{ - u32 lo, hi; - lo = readl(addr); - hi = readl(addr + 4); - return (((u64) hi) << 32) + lo; -} - -static inline void dmar_writeq(void __iomem *addr, u64 val) -{ - writel((u32)val, addr); - writel((u32)(val >> 32), addr + 4); -} -#endif #define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) #define DMAR_VER_MINOR(v) ((v) & 0x0f) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2e4de0deee53..2cb54adc4a33 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -19,12 +19,12 @@ #ifndef __LINUX_IOMMU_H #define __LINUX_IOMMU_H +#include <linux/scatterlist.h> +#include <linux/device.h> +#include <linux/types.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/of.h> -#include <linux/types.h> -#include <linux/scatterlist.h> -#include <trace/events/iommu.h> #define IOMMU_READ (1 << 0) #define IOMMU_WRITE (1 << 1) @@ -32,10 +32,13 @@ #define IOMMU_NOEXEC (1 << 3) #define IOMMU_MMIO (1 << 4) /* e.g. things like MSI doorbells */ /* - * This is to make the IOMMU API setup privileged - * mapppings accessible by the master only at higher - * privileged execution level and inaccessible at - * less privileged levels. + * Where the bus hardware includes a privilege level as part of its access type + * markings, and certain devices are capable of issuing transactions marked as + * either 'supervisor' or 'user', the IOMMU_PRIV flag requests that the other + * given permission flags only apply to accesses at the higher privilege level, + * and that unprivileged transactions should have as little access as possible. + * This would usually imply the same permissions as kernel mappings on the CPU, + * if the IOMMU page table format is equivalent. */ #define IOMMU_PRIV (1 << 5) @@ -336,46 +339,9 @@ extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t offset, u64 size, int prot); extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr); -/** - * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework - * @domain: the iommu domain where the fault has happened - * @dev: the device where the fault has happened - * @iova: the faulting address - * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) - * - * This function should be called by the low-level IOMMU implementations - * whenever IOMMU faults happen, to allow high-level users, that are - * interested in such events, to know about them. - * - * This event may be useful for several possible use cases: - * - mere logging of the event - * - dynamic TLB/PTE loading - * - if restarting of the faulting device is required - * - * Returns 0 on success and an appropriate error code otherwise (if dynamic - * PTE/TLB loading will one day be supported, implementations will be able - * to tell whether it succeeded or not according to this return value). - * - * Specifically, -ENOSYS is returned if a fault handler isn't installed - * (though fault handlers can also return -ENOSYS, in case they want to - * elicit the default behavior of the IOMMU drivers). - */ -static inline int report_iommu_fault(struct iommu_domain *domain, - struct device *dev, unsigned long iova, int flags) -{ - int ret = -ENOSYS; - - /* - * if upper layers showed interest and installed a fault handler, - * invoke it. - */ - if (domain->handler) - ret = domain->handler(domain, dev, iova, flags, - domain->handler_token); - trace_io_page_fault(dev, iova, flags); - return ret; -} +extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, + unsigned long iova, int flags); static inline size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 169ea0bd8eb4..b4ad8b4f8506 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -54,7 +54,8 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) return of_node_get(cpu_dev->of_node); } -void of_dma_configure(struct device *dev, struct device_node *np); +int of_dma_configure(struct device *dev, struct device_node *np); +void of_dma_deconfigure(struct device *dev); #else /* CONFIG_OF */ static inline int of_driver_match_device(struct device *dev, @@ -102,7 +103,12 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) { return NULL; } -static inline void of_dma_configure(struct device *dev, struct device_node *np) + +static inline int of_dma_configure(struct device *dev, struct device_node *np) +{ + return 0; +} +static inline void of_dma_deconfigure(struct device *dev) {} #endif /* CONFIG_OF */ diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h index 0496d171700a..e8b12dbf6170 100644 --- a/include/linux/platform_data/iommu-omap.h +++ b/include/linux/platform_data/iommu-omap.h @@ -12,28 +12,8 @@ #include <linux/platform_device.h> -#define MMU_REG_SIZE 256 - -/** - * struct iommu_arch_data - omap iommu private data - * @name: name of the iommu device - * @iommu_dev: handle of the iommu device - * - * This is an omap iommu private data object, which binds an iommu user - * to its iommu device. This object should be placed at the iommu user's - * dev_archdata so generic IOMMU API can be used without having to - * utilize omap-specific plumbing anymore. - */ -struct omap_iommu_arch_data { - const char *name; - struct omap_iommu *iommu_dev; -}; - struct iommu_platform_data { - const char *name; const char *reset_name; - int nr_tlb_entries; - int (*assert_reset)(struct platform_device *pdev, const char *name); int (*deassert_reset)(struct platform_device *pdev, const char *name); }; diff --git a/include/trace/events/iommu.h b/include/trace/events/iommu.h index 2c7befb10f13..99254ed89212 100644 --- a/include/trace/events/iommu.h +++ b/include/trace/events/iommu.h @@ -11,7 +11,6 @@ #define _TRACE_IOMMU_H #include <linux/tracepoint.h> -#include <linux/pci.h> struct device; |