From 0d3642883b092ccfc0b044c6581ee2c1f32ab165 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Tue, 5 Sep 2017 17:56:17 -0500 Subject: iommu/omap: Change the attach detection logic The OMAP IOMMU driver allows only a single device (eg: a rproc device) to be attached per domain. The current attach detection logic relies on a check for an attached iommu for the respective client device. Change this logic to use the client device pointer instead in preparation for supporting multiple iommu devices to be bound to a single iommu domain, and thereby to a client device. Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index bd67e1b2c64e..81ef729994ce 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -805,7 +805,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data) struct iommu_domain *domain = obj->domain; struct omap_iommu_domain *omap_domain = to_omap_domain(domain); - if (!omap_domain->iommu_dev) + if (!omap_domain->dev) return IRQ_NONE; errs = iommu_report_fault(obj, &da); @@ -1118,8 +1118,8 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) spin_lock(&omap_domain->lock); - /* only a single device is supported per domain for now */ - if (omap_domain->iommu_dev) { + /* only a single client device can be attached to a domain */ + if (omap_domain->dev) { dev_err(dev, "iommu domain is already attached\n"); ret = -EBUSY; goto out; @@ -1148,9 +1148,14 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, { struct omap_iommu *oiommu = dev_to_omap_iommu(dev); + if (!omap_domain->dev) { + dev_err(dev, "domain has no attached device\n"); + return; + } + /* only a single device is supported per domain for now */ - if (omap_domain->iommu_dev != oiommu) { - dev_err(dev, "invalid iommu device\n"); + if (omap_domain->dev != dev) { + dev_err(dev, "invalid attached device\n"); return; } @@ -1219,7 +1224,7 @@ static void omap_iommu_domain_free(struct iommu_domain *domain) * An iommu device is still attached * (currently, only one device can be attached) ? */ - if (omap_domain->iommu_dev) + if (omap_domain->dev) _omap_iommu_detach_dev(omap_domain, omap_domain->dev); kfree(omap_domain->pgtable); -- cgit v1.2.3 From 9d5018deec86673ef8418546a3ac43e47dbff3b9 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Tue, 5 Sep 2017 17:56:18 -0500 Subject: iommu/omap: Add support to program multiple iommus A client user instantiates and attaches to an iommu_domain to program the OMAP IOMMU associated with the domain. The iommus programmed by a client user are bound with the iommu_domain through the user's device archdata. The OMAP IOMMU driver currently supports only one IOMMU per IOMMU domain per user. The OMAP IOMMU driver has been enhanced to support allowing multiple IOMMUs to be programmed by a single client user. This support is being added mainly to handle the DSP subsystems on the DRA7xx SoCs, which have two MMUs within the same subsystem. These MMUs provide translations for a processor core port and an internal EDMA port. This support allows both the MMUs to be programmed together, but with each one retaining it's own internal state objects. The internal EDMA block is managed by the software running on the DSPs, and this design provides on-par functionality with previous generation OMAP DSPs where the EDMA and the DSP core shared the same MMU. The multiple iommus are expected to be provided through a sentinel terminated array of omap_iommu_arch_data objects through the client user's device archdata. The OMAP driver core is enhanced to loop through the array of attached iommus and program them for all common operations. The sentinel-terminated logic is used so as to not change the omap_iommu_arch_data structure. NOTE: 1. The IOMMU group and IOMMU core registration is done only for the DSP processor core MMU even though both MMUs are represented by their own platform device and are probed individually. The IOMMU device linking uses this registered MMU device. The struct iommu_device for the second MMU is not used even though memory for it is allocated. 2. The OMAP IOMMU debugfs code still continues to operate on individual IOMMU objects. Signed-off-by: Suman Anna [t-kristo@ti.com: ported support to 4.13 based kernel] Signed-off-by: Tero Kristo Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 358 ++++++++++++++++++++++++++++++++++----------- drivers/iommu/omap-iommu.h | 30 ++-- 2 files changed, 285 insertions(+), 103 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 81ef729994ce..e135ab830ebf 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -2,6 +2,7 @@ * omap iommu: tlb and pagetable primitives * * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/ * * Written by Hiroshi DOYU , * Paul Mundt and Toshihiro Kobayashi @@ -71,13 +72,23 @@ static struct omap_iommu_domain *to_omap_domain(struct iommu_domain *dom) **/ void omap_iommu_save_ctx(struct device *dev) { - struct omap_iommu *obj = dev_to_omap_iommu(dev); - u32 *p = obj->ctx; + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu *obj; + u32 *p; int i; - for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { - p[i] = iommu_read_reg(obj, i * sizeof(u32)); - dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]); + if (!arch_data) + return; + + while (arch_data->iommu_dev) { + obj = arch_data->iommu_dev; + p = obj->ctx; + for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { + p[i] = iommu_read_reg(obj, i * sizeof(u32)); + dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, + p[i]); + } + arch_data++; } } EXPORT_SYMBOL_GPL(omap_iommu_save_ctx); @@ -88,13 +99,23 @@ EXPORT_SYMBOL_GPL(omap_iommu_save_ctx); **/ void omap_iommu_restore_ctx(struct device *dev) { - struct omap_iommu *obj = dev_to_omap_iommu(dev); - u32 *p = obj->ctx; + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu *obj; + u32 *p; int i; - for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { - iommu_write_reg(obj, p[i], i * sizeof(u32)); - dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]); + if (!arch_data) + return; + + while (arch_data->iommu_dev) { + obj = arch_data->iommu_dev; + p = obj->ctx; + for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { + iommu_write_reg(obj, p[i], i * sizeof(u32)); + dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, + p[i]); + } + arch_data++; } } EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx); @@ -893,6 +914,24 @@ static void omap_iommu_detach(struct omap_iommu *obj) dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); } +static bool omap_iommu_can_register(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + + if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu")) + return true; + + /* + * restrict IOMMU core registration only for processor-port MDMA MMUs + * on DRA7 DSPs + */ + if ((!strcmp(dev_name(&pdev->dev), "40d01000.mmu")) || + (!strcmp(dev_name(&pdev->dev), "41501000.mmu"))) + return true; + + return false; +} + static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev, struct omap_iommu *obj) { @@ -984,19 +1023,22 @@ static int omap_iommu_probe(struct platform_device *pdev) return err; platform_set_drvdata(pdev, obj); - obj->group = iommu_group_alloc(); - if (IS_ERR(obj->group)) - return PTR_ERR(obj->group); + if (omap_iommu_can_register(pdev)) { + obj->group = iommu_group_alloc(); + if (IS_ERR(obj->group)) + return PTR_ERR(obj->group); - err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, obj->name); - if (err) - goto out_group; + err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, + obj->name); + if (err) + goto out_group; - iommu_device_set_ops(&obj->iommu, &omap_iommu_ops); + iommu_device_set_ops(&obj->iommu, &omap_iommu_ops); - err = iommu_device_register(&obj->iommu); - if (err) - goto out_sysfs; + err = iommu_device_register(&obj->iommu); + if (err) + goto out_sysfs; + } pm_runtime_irq_safe(obj->dev); pm_runtime_enable(obj->dev); @@ -1018,11 +1060,13 @@ static int omap_iommu_remove(struct platform_device *pdev) { struct omap_iommu *obj = platform_get_drvdata(pdev); - iommu_group_put(obj->group); - obj->group = NULL; + if (obj->group) { + iommu_group_put(obj->group); + obj->group = NULL; - iommu_device_sysfs_remove(&obj->iommu); - iommu_device_unregister(&obj->iommu); + iommu_device_sysfs_remove(&obj->iommu); + iommu_device_unregister(&obj->iommu); + } omap_iommu_debugfs_remove(obj); @@ -1068,11 +1112,13 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, phys_addr_t pa, size_t bytes, int prot) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); - struct omap_iommu *oiommu = omap_domain->iommu_dev; - struct device *dev = oiommu->dev; + struct device *dev = omap_domain->dev; + struct omap_iommu_device *iommu; + struct omap_iommu *oiommu; struct iotlb_entry e; int omap_pgsz; - u32 ret; + u32 ret = -EINVAL; + int i; omap_pgsz = bytes_to_iopgsz(bytes); if (omap_pgsz < 0) { @@ -1084,9 +1130,24 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, iotlb_init_entry(&e, da, pa, omap_pgsz); - ret = omap_iopgtable_store_entry(oiommu, &e); - if (ret) - dev_err(dev, "omap_iopgtable_store_entry failed: %d\n", ret); + iommu = omap_domain->iommus; + for (i = 0; i < omap_domain->num_iommus; i++, iommu++) { + oiommu = iommu->iommu_dev; + ret = omap_iopgtable_store_entry(oiommu, &e); + if (ret) { + dev_err(dev, "omap_iopgtable_store_entry failed: %d\n", + ret); + break; + } + } + + if (ret) { + while (i--) { + iommu--; + oiommu = iommu->iommu_dev; + iopgtable_clear_entry(oiommu, da); + } + } return ret; } @@ -1095,12 +1156,90 @@ static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da, size_t size) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); - struct omap_iommu *oiommu = omap_domain->iommu_dev; - struct device *dev = oiommu->dev; + struct device *dev = omap_domain->dev; + struct omap_iommu_device *iommu; + struct omap_iommu *oiommu; + bool error = false; + size_t bytes = 0; + int i; dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size); - return iopgtable_clear_entry(oiommu, da); + iommu = omap_domain->iommus; + for (i = 0; i < omap_domain->num_iommus; i++, iommu++) { + oiommu = iommu->iommu_dev; + bytes = iopgtable_clear_entry(oiommu, da); + if (!bytes) + error = true; + } + + /* + * simplify return - we are only checking if any of the iommus + * reported an error, but not if all of them are unmapping the + * same number of entries. This should not occur due to the + * mirror programming. + */ + return error ? 0 : bytes; +} + +static int omap_iommu_count(struct device *dev) +{ + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + int count = 0; + + while (arch_data->iommu_dev) { + count++; + arch_data++; + } + + return count; +} + +/* caller should call cleanup if this function fails */ +static int omap_iommu_attach_init(struct device *dev, + struct omap_iommu_domain *odomain) +{ + struct omap_iommu_device *iommu; + int i; + + odomain->num_iommus = omap_iommu_count(dev); + if (!odomain->num_iommus) + return -EINVAL; + + odomain->iommus = kcalloc(odomain->num_iommus, sizeof(*iommu), + GFP_ATOMIC); + if (!odomain->iommus) + return -ENOMEM; + + iommu = odomain->iommus; + for (i = 0; i < odomain->num_iommus; i++, iommu++) { + iommu->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_ATOMIC); + if (!iommu->pgtable) + return -ENOMEM; + + /* + * should never fail, but please keep this around to ensure + * we keep the hardware happy + */ + if (WARN_ON(!IS_ALIGNED((long)iommu->pgtable, + IOPGD_TABLE_SIZE))) + return -EINVAL; + } + + return 0; +} + +static void omap_iommu_detach_fini(struct omap_iommu_domain *odomain) +{ + int i; + struct omap_iommu_device *iommu = odomain->iommus; + + for (i = 0; iommu && i < odomain->num_iommus; i++, iommu++) + kfree(iommu->pgtable); + + kfree(odomain->iommus); + odomain->num_iommus = 0; + odomain->iommus = NULL; } static int @@ -1108,8 +1247,10 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu_device *iommu; struct omap_iommu *oiommu; int ret = 0; + int i; if (!arch_data || !arch_data->iommu_dev) { dev_err(dev, "device doesn't have an associated iommu\n"); @@ -1125,19 +1266,42 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) goto out; } - oiommu = arch_data->iommu_dev; - - /* get a handle to and enable the omap iommu */ - ret = omap_iommu_attach(oiommu, omap_domain->pgtable); + ret = omap_iommu_attach_init(dev, omap_domain); if (ret) { - dev_err(dev, "can't get omap iommu: %d\n", ret); - goto out; + dev_err(dev, "failed to allocate required iommu data %d\n", + ret); + goto init_fail; + } + + iommu = omap_domain->iommus; + for (i = 0; i < omap_domain->num_iommus; i++, iommu++, arch_data++) { + /* configure and enable the omap iommu */ + oiommu = arch_data->iommu_dev; + ret = omap_iommu_attach(oiommu, iommu->pgtable); + if (ret) { + dev_err(dev, "can't get omap iommu: %d\n", ret); + goto attach_fail; + } + + oiommu->domain = domain; + iommu->iommu_dev = oiommu; } - omap_domain->iommu_dev = oiommu; omap_domain->dev = dev; - oiommu->domain = domain; + goto out; + +attach_fail: + while (i--) { + iommu--; + arch_data--; + oiommu = iommu->iommu_dev; + omap_iommu_detach(oiommu); + iommu->iommu_dev = NULL; + oiommu->domain = NULL; + } +init_fail: + omap_iommu_detach_fini(omap_domain); out: spin_unlock(&omap_domain->lock); return ret; @@ -1146,7 +1310,10 @@ out: static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, struct device *dev) { - struct omap_iommu *oiommu = dev_to_omap_iommu(dev); + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu_device *iommu = omap_domain->iommus; + struct omap_iommu *oiommu; + int i; if (!omap_domain->dev) { dev_err(dev, "domain has no attached device\n"); @@ -1159,13 +1326,24 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, return; } - iopgtable_clear_entry_all(oiommu); + /* + * cleanup in the reverse order of attachment - this addresses + * any h/w dependencies between multiple instances, if any + */ + iommu += (omap_domain->num_iommus - 1); + arch_data += (omap_domain->num_iommus - 1); + for (i = 0; i < omap_domain->num_iommus; i++, iommu--, arch_data--) { + oiommu = iommu->iommu_dev; + iopgtable_clear_entry_all(oiommu); + + omap_iommu_detach(oiommu); + iommu->iommu_dev = NULL; + oiommu->domain = NULL; + } - omap_iommu_detach(oiommu); + omap_iommu_detach_fini(omap_domain); - omap_domain->iommu_dev = NULL; omap_domain->dev = NULL; - oiommu->domain = NULL; } static void omap_iommu_detach_dev(struct iommu_domain *domain, @@ -1187,18 +1365,7 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type) omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL); if (!omap_domain) - goto out; - - omap_domain->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_KERNEL); - if (!omap_domain->pgtable) - goto fail_nomem; - - /* - * should never fail, but please keep this around to ensure - * we keep the hardware happy - */ - if (WARN_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE))) - goto fail_align; + return NULL; spin_lock_init(&omap_domain->lock); @@ -1207,13 +1374,6 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type) omap_domain->domain.geometry.force_aperture = true; return &omap_domain->domain; - -fail_align: - kfree(omap_domain->pgtable); -fail_nomem: - kfree(omap_domain); -out: - return NULL; } static void omap_iommu_domain_free(struct iommu_domain *domain) @@ -1227,7 +1387,6 @@ static void omap_iommu_domain_free(struct iommu_domain *domain) if (omap_domain->dev) _omap_iommu_detach_dev(omap_domain, omap_domain->dev); - kfree(omap_domain->pgtable); kfree(omap_domain); } @@ -1235,11 +1394,16 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t da) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); - struct omap_iommu *oiommu = omap_domain->iommu_dev; + struct omap_iommu_device *iommu = omap_domain->iommus; + struct omap_iommu *oiommu = iommu->iommu_dev; struct device *dev = oiommu->dev; u32 *pgd, *pte; phys_addr_t ret = 0; + /* + * all the iommus within the domain will have identical programming, + * so perform the lookup using just the first iommu + */ iopgtable_lookup_entry(oiommu, da, &pgd, &pte); if (pte) { @@ -1265,11 +1429,12 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain, static int omap_iommu_add_device(struct device *dev) { - struct omap_iommu_arch_data *arch_data; + struct omap_iommu_arch_data *arch_data, *tmp; struct omap_iommu *oiommu; struct iommu_group *group; struct device_node *np; struct platform_device *pdev; + int num_iommus, i; int ret; /* @@ -1281,36 +1446,57 @@ static int omap_iommu_add_device(struct device *dev) if (!dev->of_node) return 0; - np = of_parse_phandle(dev->of_node, "iommus", 0); - if (!np) + /* + * retrieve the count of IOMMU nodes using phandle size as element size + * since #iommu-cells = 0 for OMAP + */ + num_iommus = of_property_count_elems_of_size(dev->of_node, "iommus", + sizeof(phandle)); + if (num_iommus < 0) return 0; - pdev = of_find_device_by_node(np); - if (WARN_ON(!pdev)) { - of_node_put(np); - return -EINVAL; - } + arch_data = kzalloc((num_iommus + 1) * sizeof(*arch_data), GFP_KERNEL); + if (!arch_data) + return -ENOMEM; - oiommu = platform_get_drvdata(pdev); - if (!oiommu) { - of_node_put(np); - return -EINVAL; - } + for (i = 0, tmp = arch_data; i < num_iommus; i++, tmp++) { + np = of_parse_phandle(dev->of_node, "iommus", i); + if (!np) { + kfree(arch_data); + return -EINVAL; + } + + pdev = of_find_device_by_node(np); + if (WARN_ON(!pdev)) { + of_node_put(np); + kfree(arch_data); + return -EINVAL; + } + + oiommu = platform_get_drvdata(pdev); + if (!oiommu) { + of_node_put(np); + kfree(arch_data); + return -EINVAL; + } + + tmp->iommu_dev = oiommu; - arch_data = kzalloc(sizeof(*arch_data), GFP_KERNEL); - if (!arch_data) { of_node_put(np); - return -ENOMEM; } + /* + * use the first IOMMU alone for the sysfs device linking. + * TODO: Evaluate if a single iommu_group needs to be + * maintained for both IOMMUs + */ + oiommu = arch_data->iommu_dev; ret = iommu_device_link(&oiommu->iommu, dev); if (ret) { kfree(arch_data); - of_node_put(np); return ret; } - arch_data->iommu_dev = oiommu; dev->archdata.iommu = arch_data; /* @@ -1326,8 +1512,6 @@ static int omap_iommu_add_device(struct device *dev) } iommu_group_put(group); - of_node_put(np); - return 0; } diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index a675af29a6ec..1703159ef5af 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -28,18 +28,27 @@ struct iotlb_entry { u32 endian, elsz, mixed; }; +/** + * struct omap_iommu_device - omap iommu device data + * @pgtable: page table used by an omap iommu attached to a domain + * @iommu_dev: pointer to store an omap iommu instance attached to a domain + */ +struct omap_iommu_device { + u32 *pgtable; + struct omap_iommu *iommu_dev; +}; + /** * struct omap_iommu_domain - omap iommu domain - * @pgtable: the page table - * @iommu_dev: an omap iommu device attached to this domain. only a single - * iommu device can be attached for now. + * @num_iommus: number of iommus in this domain + * @iommus: omap iommu device data for all iommus in this domain * @dev: Device using this domain. * @lock: domain lock, should be taken when attaching/detaching * @domain: generic domain handle used by iommu core code */ struct omap_iommu_domain { - u32 *pgtable; - struct omap_iommu *iommu_dev; + u32 num_iommus; + struct omap_iommu_device *iommus; struct device *dev; spinlock_t lock; struct iommu_domain domain; @@ -97,17 +106,6 @@ struct iotlb_lock { short vict; }; -/** - * dev_to_omap_iommu() - retrieves an omap iommu object from a user device - * @dev: iommu client device - */ -static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev) -{ - struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; - - return arch_data->iommu_dev; -} - /* * MMU Register offsets */ -- cgit v1.2.3 From 7a974b29fe5d3704eafec707ba6390c3288c80fe Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 15 Sep 2017 13:05:08 +0200 Subject: iommu/exynos: Rework runtime PM links management add_device is a bit more suitable for establishing runtime PM links than the xlate callback. This change also makes it possible to implement proper cleanup - in remove_device callback. Signed-off-by: Marek Szyprowski Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index f596fcc32898..91c548d49b92 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -263,6 +263,7 @@ struct exynos_iommu_domain { struct sysmmu_drvdata { struct device *sysmmu; /* SYSMMU controller device */ struct device *master; /* master device (owner) */ + struct device_link *link; /* runtime PM link to master */ void __iomem *sfrbase; /* our registers */ struct clk *clk; /* SYSMMU's clock */ struct clk *aclk; /* SYSMMU's aclk clock */ @@ -1250,6 +1251,8 @@ static struct iommu_group *get_device_iommu_group(struct device *dev) static int exynos_iommu_add_device(struct device *dev) { + struct exynos_iommu_owner *owner = dev->archdata.iommu; + struct sysmmu_drvdata *data; struct iommu_group *group; if (!has_sysmmu(dev)) @@ -1260,6 +1263,15 @@ static int exynos_iommu_add_device(struct device *dev) if (IS_ERR(group)) return PTR_ERR(group); + list_for_each_entry(data, &owner->controllers, owner_node) { + /* + * SYSMMU will be runtime activated via device link + * (dependency) to its master device, so there are no + * direct calls to pm_runtime_get/put in this driver. + */ + data->link = device_link_add(dev, data->sysmmu, + DL_FLAG_PM_RUNTIME); + } iommu_group_put(group); return 0; @@ -1268,6 +1280,7 @@ static int exynos_iommu_add_device(struct device *dev) static void exynos_iommu_remove_device(struct device *dev) { struct exynos_iommu_owner *owner = dev->archdata.iommu; + struct sysmmu_drvdata *data; if (!has_sysmmu(dev)) return; @@ -1283,6 +1296,9 @@ static void exynos_iommu_remove_device(struct device *dev) } } iommu_group_remove_device(dev); + + list_for_each_entry(data, &owner->controllers, owner_node) + device_link_del(data->link); } static int exynos_iommu_of_xlate(struct device *dev, @@ -1316,13 +1332,6 @@ static int exynos_iommu_of_xlate(struct device *dev, list_add_tail(&data->owner_node, &owner->controllers); data->master = dev; - /* - * SYSMMU will be runtime activated via device link (dependency) to its - * master device, so there are no direct calls to pm_runtime_get/put - * in this driver. - */ - device_link_add(dev, data->sysmmu, DL_FLAG_PM_RUNTIME); - return 0; } -- cgit v1.2.3 From 2070f940a6d5148cf2df0d0087ff0a64d9f15237 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 21 Sep 2017 16:52:42 +0100 Subject: iommu/iova: Optimise rbtree searching Checking the IOVA bounds separately before deciding which direction to continue the search (if necessary) results in redundantly comparing both pfns twice each. GCC can already determine that the final comparison op is redundant and optimise it down to 3 in total, but we can go one further with a little tweak of the ordering (which makes the intent of the code that much cleaner as a bonus). Signed-off-by: Zhen Lei Tested-by: Ard Biesheuvel Tested-by: Zhen Lei Tested-by: Nate Watterson [rm: rewrote commit message to clarify] Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 33edfa794ae9..f129ff4f5c89 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -342,15 +342,12 @@ private_find_iova(struct iova_domain *iovad, unsigned long pfn) while (node) { struct iova *iova = rb_entry(node, struct iova, node); - /* If pfn falls within iova's range, return iova */ - if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) { - return iova; - } - if (pfn < iova->pfn_lo) node = node->rb_left; - else if (pfn > iova->pfn_lo) + else if (pfn > iova->pfn_hi) node = node->rb_right; + else + return iova; /* pfn falls within iova's range */ } return NULL; -- cgit v1.2.3 From 086c83acb70fc6da044c9ca45c1c9780c64545b0 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 21 Sep 2017 16:52:43 +0100 Subject: iommu/iova: Optimise the padding calculation The mask for calculating the padding size doesn't change, so there's no need to recalculate it every loop iteration. Furthermore, Once we've done that, it becomes clear that we don't actually need to calculate a padding size at all - by flipping the arithmetic around, we can just combine the upper limit, size, and mask directly to check against the lower limit. For an arm64 build, this alone knocks 20% off the object code size of the entire alloc_iova() function! Signed-off-by: Zhen Lei Tested-by: Ard Biesheuvel Tested-by: Zhen Lei Tested-by: Nate Watterson [rm: simplified more of the arithmetic, rewrote commit message] Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index f129ff4f5c89..20be9a8b3188 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -182,24 +182,17 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova, rb_insert_color(&iova->node, root); } -/* - * Computes the padding size required, to make the start address - * naturally aligned on the power-of-two order of its size - */ -static unsigned int -iova_get_pad_size(unsigned int size, unsigned int limit_pfn) -{ - return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1); -} - static int __alloc_and_insert_iova_range(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, struct iova *new, bool size_aligned) { struct rb_node *prev, *curr = NULL; unsigned long flags; - unsigned long saved_pfn; - unsigned int pad_size = 0; + unsigned long saved_pfn, new_pfn; + unsigned long align_mask = ~0UL; + + if (size_aligned) + align_mask <<= fls_long(size - 1); /* Walk the tree backwards */ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); @@ -209,31 +202,26 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, while (curr) { struct iova *curr_iova = rb_entry(curr, struct iova, node); - if (limit_pfn <= curr_iova->pfn_lo) { + if (limit_pfn <= curr_iova->pfn_lo) goto move_left; - } else if (limit_pfn > curr_iova->pfn_hi) { - if (size_aligned) - pad_size = iova_get_pad_size(size, limit_pfn); - if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn) - break; /* found a free slot */ - } + + if (((limit_pfn - size) & align_mask) > curr_iova->pfn_hi) + break; /* found a free slot */ + limit_pfn = curr_iova->pfn_lo; move_left: prev = curr; curr = rb_prev(curr); } - if (!curr) { - if (size_aligned) - pad_size = iova_get_pad_size(size, limit_pfn); - if ((iovad->start_pfn + size + pad_size) > limit_pfn) { - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - return -ENOMEM; - } + new_pfn = (limit_pfn - size) & align_mask; + if (limit_pfn < size || new_pfn < iovad->start_pfn) { + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return -ENOMEM; } /* pfn_lo will point to size aligned address if size_aligned is set */ - new->pfn_lo = limit_pfn - (size + pad_size); + new->pfn_lo = new_pfn; new->pfn_hi = new->pfn_lo + size - 1; /* If we have 'prev', it's a valid place to start the insertion. */ -- cgit v1.2.3 From e60aa7b53845a261dd419652f12ab9f89e668843 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 21 Sep 2017 16:52:44 +0100 Subject: iommu/iova: Extend rbtree node caching The cached node mechanism provides a significant performance benefit for allocations using a 32-bit DMA mask, but in the case of non-PCI devices or where the 32-bit space is full, the loss of this benefit can be significant - on large systems there can be many thousands of entries in the tree, such that walking all the way down to find free space every time becomes increasingly awful. Maintain a similar cached node for the whole IOVA space as a superset of the 32-bit space so that performance can remain much more consistent. Inspired by work by Zhen Lei . Tested-by: Ard Biesheuvel Tested-by: Zhen Lei Tested-by: Nate Watterson Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 60 ++++++++++++++++++++++++---------------------------- include/linux/iova.h | 3 ++- 2 files changed, 30 insertions(+), 33 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 20be9a8b3188..c6f5a22f8d20 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -48,6 +48,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, spin_lock_init(&iovad->iova_rbtree_lock); iovad->rbroot = RB_ROOT; + iovad->cached_node = NULL; iovad->cached32_node = NULL; iovad->granule = granule; iovad->start_pfn = start_pfn; @@ -110,48 +111,44 @@ EXPORT_SYMBOL_GPL(init_iova_flush_queue); static struct rb_node * __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) { - if ((*limit_pfn > iovad->dma_32bit_pfn) || - (iovad->cached32_node == NULL)) + struct rb_node *cached_node = NULL; + struct iova *curr_iova; + + if (*limit_pfn <= iovad->dma_32bit_pfn) + cached_node = iovad->cached32_node; + if (!cached_node) + cached_node = iovad->cached_node; + if (!cached_node) return rb_last(&iovad->rbroot); - else { - struct rb_node *prev_node = rb_prev(iovad->cached32_node); - struct iova *curr_iova = - rb_entry(iovad->cached32_node, struct iova, node); - *limit_pfn = curr_iova->pfn_lo; - return prev_node; - } + + curr_iova = rb_entry(cached_node, struct iova, node); + *limit_pfn = min(*limit_pfn, curr_iova->pfn_lo); + + return rb_prev(cached_node); } static void -__cached_rbnode_insert_update(struct iova_domain *iovad, - unsigned long limit_pfn, struct iova *new) +__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new) { - if (limit_pfn != iovad->dma_32bit_pfn) - return; - iovad->cached32_node = &new->node; + if (new->pfn_hi < iovad->dma_32bit_pfn) + iovad->cached32_node = &new->node; + else + iovad->cached_node = &new->node; } static void __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) { struct iova *cached_iova; - struct rb_node *curr; - if (!iovad->cached32_node) - return; - curr = iovad->cached32_node; - cached_iova = rb_entry(curr, struct iova, node); + cached_iova = rb_entry(iovad->cached32_node, struct iova, node); + if (free->pfn_hi < iovad->dma_32bit_pfn && + iovad->cached32_node && free->pfn_lo >= cached_iova->pfn_lo) + iovad->cached32_node = rb_next(&free->node); - if (free->pfn_lo >= cached_iova->pfn_lo) { - struct rb_node *node = rb_next(&free->node); - struct iova *iova = rb_entry(node, struct iova, node); - - /* only cache if it's below 32bit pfn */ - if (node && iova->pfn_lo < iovad->dma_32bit_pfn) - iovad->cached32_node = node; - else - iovad->cached32_node = NULL; - } + cached_iova = rb_entry(iovad->cached_node, struct iova, node); + if (iovad->cached_node && free->pfn_lo >= cached_iova->pfn_lo) + iovad->cached_node = rb_next(&free->node); } /* Insert the iova into domain rbtree by holding writer lock */ @@ -188,7 +185,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, { struct rb_node *prev, *curr = NULL; unsigned long flags; - unsigned long saved_pfn, new_pfn; + unsigned long new_pfn; unsigned long align_mask = ~0UL; if (size_aligned) @@ -196,7 +193,6 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, /* Walk the tree backwards */ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - saved_pfn = limit_pfn; curr = __get_cached_rbnode(iovad, &limit_pfn); prev = curr; while (curr) { @@ -226,7 +222,7 @@ move_left: /* If we have 'prev', it's a valid place to start the insertion. */ iova_insert_rbtree(&iovad->rbroot, new, prev); - __cached_rbnode_insert_update(iovad, saved_pfn, new); + __cached_rbnode_insert_update(iovad, new); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); diff --git a/include/linux/iova.h b/include/linux/iova.h index d179b9bf7814..69ea3e258ff2 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -70,7 +70,8 @@ struct iova_fq { struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ - struct rb_node *cached32_node; /* Save last alloced node */ + struct rb_node *cached_node; /* Save last alloced node */ + struct rb_node *cached32_node; /* Save last 32-bit alloced node */ unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; -- cgit v1.2.3 From aa3ac9469c1850ed00741955b975c3a19029763a Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 21 Sep 2017 16:52:45 +0100 Subject: iommu/iova: Make dma_32bit_pfn implicit Now that the cached node optimisation can apply to all allocations, the couple of users which were playing tricks with dma_32bit_pfn in order to benefit from it can stop doing so. Conversely, there is also no need for all the other users to explicitly calculate a 'real' 32-bit PFN, when init_iova_domain() can happily do that itself from the page granularity. CC: Thierry Reding CC: Jonathan Hunter CC: David Airlie CC: Sudeep Dutt CC: Ashutosh Dixit Signed-off-by: Zhen Lei Tested-by: Ard Biesheuvel Tested-by: Zhen Lei Tested-by: Nate Watterson [rm: use iova_shift(), rewrote commit message] Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/gpu/drm/tegra/drm.c | 3 +-- drivers/gpu/host1x/dev.c | 3 +-- drivers/iommu/amd_iommu.c | 7 ++----- drivers/iommu/dma-iommu.c | 18 +----------------- drivers/iommu/intel-iommu.c | 11 +++-------- drivers/iommu/iova.c | 4 ++-- drivers/misc/mic/scif/scif_rma.c | 3 +-- include/linux/iova.h | 5 ++--- 8 files changed, 13 insertions(+), 41 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 597d563d636a..b822e484b7e5 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -155,8 +155,7 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags) order = __ffs(tegra->domain->pgsize_bitmap); init_iova_domain(&tegra->carveout.domain, 1UL << order, - carveout_start >> order, - carveout_end >> order); + carveout_start >> order); tegra->carveout.shift = iova_shift(&tegra->carveout.domain); tegra->carveout.limit = carveout_end >> tegra->carveout.shift; diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 7f22c5c37660..5267c62e8896 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -198,8 +198,7 @@ static int host1x_probe(struct platform_device *pdev) order = __ffs(host->domain->pgsize_bitmap); init_iova_domain(&host->iova, 1UL << order, - geometry->aperture_start >> order, - geometry->aperture_end >> order); + geometry->aperture_start >> order); host->iova_end = geometry->aperture_end; } diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 51f8215877f5..647ab7691aee 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -63,7 +63,6 @@ /* IO virtual address start page frame number */ #define IOVA_START_PFN (1) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) -#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) /* Reserved IOVA ranges */ #define MSI_RANGE_START (0xfee00000) @@ -1788,8 +1787,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) if (!dma_dom->domain.pt_root) goto free_dma_dom; - init_iova_domain(&dma_dom->iovad, PAGE_SIZE, - IOVA_START_PFN, DMA_32BIT_PFN); + init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN); if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL)) goto free_dma_dom; @@ -2696,8 +2694,7 @@ static int init_reserved_iova_ranges(void) struct pci_dev *pdev = NULL; struct iova *val; - init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, - IOVA_START_PFN, DMA_32BIT_PFN); + init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, IOVA_START_PFN); lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock, &reserved_rbtree_key); diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 9d1cebe7f6cb..191be9c80a8a 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -292,18 +292,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, /* ...then finally give it a kicking to make sure it fits */ base_pfn = max_t(unsigned long, base_pfn, domain->geometry.aperture_start >> order); - end_pfn = min_t(unsigned long, end_pfn, - domain->geometry.aperture_end >> order); } - /* - * PCI devices may have larger DMA masks, but still prefer allocating - * within a 32-bit mask to avoid DAC addressing. Such limitations don't - * apply to the typical platform device, so for those we may as well - * leave the cache limit at the top of their range to save an rb_last() - * traversal on every allocation. - */ - if (dev && dev_is_pci(dev)) - end_pfn &= DMA_BIT_MASK(32) >> order; /* start_pfn is always nonzero for an already-initialised domain */ if (iovad->start_pfn) { @@ -312,16 +301,11 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, pr_warn("Incompatible range for DMA domain\n"); return -EFAULT; } - /* - * If we have devices with different DMA masks, move the free - * area cache limit down for the benefit of the smaller one. - */ - iovad->dma_32bit_pfn = min(end_pfn + 1, iovad->dma_32bit_pfn); return 0; } - init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn); + init_iova_domain(iovad, 1UL << order, base_pfn); if (!dev) return 0; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 6784a05dd6b2..ebb48353dd39 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -82,8 +82,6 @@ #define IOVA_START_PFN (1) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) -#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) -#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) /* page table handling */ #define LEVEL_STRIDE (9) @@ -1878,8 +1876,7 @@ static int dmar_init_reserved_ranges(void) struct iova *iova; int i; - init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN, - DMA_32BIT_PFN); + init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN); lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, &reserved_rbtree_key); @@ -1938,8 +1935,7 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, unsigned long sagaw; int err; - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, - DMA_32BIT_PFN); + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); err = init_iova_flush_queue(&domain->iovad, iommu_flush_iova, iova_entry_free); @@ -4897,8 +4893,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) { int adjust_width; - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, - DMA_32BIT_PFN); + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); domain_reserve_special_ranges(domain); /* calculate AGAW */ diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index c6f5a22f8d20..65032e60a5d1 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -37,7 +37,7 @@ static void fq_flush_timeout(unsigned long data); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, - unsigned long start_pfn, unsigned long pfn_32bit) + unsigned long start_pfn) { /* * IOVA granularity will normally be equal to the smallest @@ -52,7 +52,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->cached32_node = NULL; iovad->granule = granule; iovad->start_pfn = start_pfn; - iovad->dma_32bit_pfn = pfn_32bit + 1; + iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); iovad->flush_cb = NULL; iovad->fq = NULL; init_iova_rcaches(iovad); diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c index 329727e00e97..c824329f7012 100644 --- a/drivers/misc/mic/scif/scif_rma.c +++ b/drivers/misc/mic/scif/scif_rma.c @@ -39,8 +39,7 @@ void scif_rma_ep_init(struct scif_endpt *ep) struct scif_endpt_rma_info *rma = &ep->rma_info; mutex_init(&rma->rma_lock); - init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN, - SCIF_DMA_64BIT_PFN); + init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN); spin_lock_init(&rma->tc_lock); mutex_init(&rma->mmn_lock); INIT_LIST_HEAD(&rma->reg_list); diff --git a/include/linux/iova.h b/include/linux/iova.h index 69ea3e258ff2..953cfd20f152 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -154,7 +154,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, - unsigned long start_pfn, unsigned long pfn_32bit); + unsigned long start_pfn); int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); @@ -230,8 +230,7 @@ static inline void copy_reserved_iova(struct iova_domain *from, static inline void init_iova_domain(struct iova_domain *iovad, unsigned long granule, - unsigned long start_pfn, - unsigned long pfn_32bit) + unsigned long start_pfn) { } -- cgit v1.2.3 From bb68b2fbfbd643d4407541f9c7a16a2c9b3a57c7 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 21 Sep 2017 16:52:46 +0100 Subject: iommu/iova: Add rbtree anchor node Add a permanent dummy IOVA reservation to the rbtree, such that we can always access the top of the address space instantly. The immediate benefit is that we remove the overhead of the rb_last() traversal when not using the cached node, but it also paves the way for further simplifications. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 15 +++++++++++++-- include/linux/iova.h | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 65032e60a5d1..9e04c1f3e740 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -24,6 +24,9 @@ #include #include +/* The anchor node sits above the top of the usable address space */ +#define IOVA_ANCHOR ~0UL + static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, unsigned long size); @@ -55,6 +58,9 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); iovad->flush_cb = NULL; iovad->fq = NULL; + iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; + rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node); + rb_insert_color(&iovad->anchor.node, &iovad->rbroot); init_iova_rcaches(iovad); } EXPORT_SYMBOL_GPL(init_iova_domain); @@ -119,7 +125,7 @@ __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) if (!cached_node) cached_node = iovad->cached_node; if (!cached_node) - return rb_last(&iovad->rbroot); + return rb_prev(&iovad->anchor.node); curr_iova = rb_entry(cached_node, struct iova, node); *limit_pfn = min(*limit_pfn, curr_iova->pfn_lo); @@ -242,7 +248,8 @@ EXPORT_SYMBOL(alloc_iova_mem); void free_iova_mem(struct iova *iova) { - kmem_cache_free(iova_cache, iova); + if (iova->pfn_lo != IOVA_ANCHOR) + kmem_cache_free(iova_cache, iova); } EXPORT_SYMBOL(free_iova_mem); @@ -676,6 +683,10 @@ reserve_iova(struct iova_domain *iovad, struct iova *iova; unsigned int overlap = 0; + /* Don't allow nonsensical pfns */ + if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad)))) + return NULL; + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { if (__is_range_overlap(node, pfn_lo, pfn_hi)) { diff --git a/include/linux/iova.h b/include/linux/iova.h index 953cfd20f152..c696ee81054e 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -75,6 +75,7 @@ struct iova_domain { unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; + struct iova anchor; /* rbtree lookup anchor */ struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU -- cgit v1.2.3 From 973f5fbedb0721ab964386a5fe5120998e71580c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 21 Sep 2017 16:52:47 +0100 Subject: iommu/iova: Simplify cached node logic The logic of __get_cached_rbnode() is a little obtuse, but then __get_prev_node_of_cached_rbnode_or_last_node_and_update_limit_pfn() wouldn't exactly roll off the tongue... Now that we have the invariant that there is always a valid node to start searching downwards from, everything gets a bit easier to follow if we simplify that function to do what it says on the tin and return the cached node (or anchor node as appropriate) directly. In turn, we can then deduplicate the rb_prev() and limit_pfn logic into the main loop itself, further reduce the amount of code under the lock, and generally make the inner workings a bit less subtle. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 51 +++++++++++++++++---------------------------------- 1 file changed, 17 insertions(+), 34 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 9e04c1f3e740..7b7363518733 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -51,8 +51,8 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, spin_lock_init(&iovad->iova_rbtree_lock); iovad->rbroot = RB_ROOT; - iovad->cached_node = NULL; - iovad->cached32_node = NULL; + iovad->cached_node = &iovad->anchor.node; + iovad->cached32_node = &iovad->anchor.node; iovad->granule = granule; iovad->start_pfn = start_pfn; iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); @@ -115,22 +115,12 @@ int init_iova_flush_queue(struct iova_domain *iovad, EXPORT_SYMBOL_GPL(init_iova_flush_queue); static struct rb_node * -__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) +__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) { - struct rb_node *cached_node = NULL; - struct iova *curr_iova; - - if (*limit_pfn <= iovad->dma_32bit_pfn) - cached_node = iovad->cached32_node; - if (!cached_node) - cached_node = iovad->cached_node; - if (!cached_node) - return rb_prev(&iovad->anchor.node); + if (limit_pfn <= iovad->dma_32bit_pfn) + return iovad->cached32_node; - curr_iova = rb_entry(cached_node, struct iova, node); - *limit_pfn = min(*limit_pfn, curr_iova->pfn_lo); - - return rb_prev(cached_node); + return iovad->cached_node; } static void @@ -149,11 +139,11 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) cached_iova = rb_entry(iovad->cached32_node, struct iova, node); if (free->pfn_hi < iovad->dma_32bit_pfn && - iovad->cached32_node && free->pfn_lo >= cached_iova->pfn_lo) + free->pfn_lo >= cached_iova->pfn_lo) iovad->cached32_node = rb_next(&free->node); cached_iova = rb_entry(iovad->cached_node, struct iova, node); - if (iovad->cached_node && free->pfn_lo >= cached_iova->pfn_lo) + if (free->pfn_lo >= cached_iova->pfn_lo) iovad->cached_node = rb_next(&free->node); } @@ -189,7 +179,8 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, struct iova *new, bool size_aligned) { - struct rb_node *prev, *curr = NULL; + struct rb_node *curr, *prev; + struct iova *curr_iova; unsigned long flags; unsigned long new_pfn; unsigned long align_mask = ~0UL; @@ -199,24 +190,16 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, /* Walk the tree backwards */ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - curr = __get_cached_rbnode(iovad, &limit_pfn); - prev = curr; - while (curr) { - struct iova *curr_iova = rb_entry(curr, struct iova, node); - - if (limit_pfn <= curr_iova->pfn_lo) - goto move_left; - - if (((limit_pfn - size) & align_mask) > curr_iova->pfn_hi) - break; /* found a free slot */ - - limit_pfn = curr_iova->pfn_lo; -move_left: + curr = __get_cached_rbnode(iovad, limit_pfn); + curr_iova = rb_entry(curr, struct iova, node); + do { + limit_pfn = min(limit_pfn, curr_iova->pfn_lo); + new_pfn = (limit_pfn - size) & align_mask; prev = curr; curr = rb_prev(curr); - } + curr_iova = rb_entry(curr, struct iova, node); + } while (curr && new_pfn <= curr_iova->pfn_hi); - new_pfn = (limit_pfn - size) & align_mask; if (limit_pfn < size || new_pfn < iovad->start_pfn) { spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); return -ENOMEM; -- cgit v1.2.3 From 7595dc588a39c37091ddf65f6c0a3cd40f128e7a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 19 Sep 2017 14:48:39 +0100 Subject: iommu/iova: Simplify domain destruction All put_iova_domain() should have to worry about is freeing memory - by that point the domain must no longer be live, so the act of cleaning up doesn't need to be concurrency-safe or maintain the rbtree in a self-consistent state. There's no need to waste time with locking or emptying the rcache magazines, and we can just use the postorder traversal helper to clear out the remaining rbtree entries in-place. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 48 +++++++++--------------------------------------- 1 file changed, 9 insertions(+), 39 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 7b7363518733..ca21196c1f2d 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -583,21 +583,12 @@ EXPORT_SYMBOL_GPL(queue_iova); */ void put_iova_domain(struct iova_domain *iovad) { - struct rb_node *node; - unsigned long flags; + struct iova *iova, *tmp; free_iova_flush_queue(iovad); free_iova_rcaches(iovad); - spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - node = rb_first(&iovad->rbroot); - while (node) { - struct iova *iova = rb_entry(node, struct iova, node); - - rb_erase(node, &iovad->rbroot); + rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) free_iova_mem(iova); - node = rb_first(&iovad->rbroot); - } - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); } EXPORT_SYMBOL_GPL(put_iova_domain); @@ -989,47 +980,26 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn); } -/* - * Free a cpu's rcache. - */ -static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad, - struct iova_rcache *rcache) -{ - struct iova_cpu_rcache *cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); - unsigned long flags; - - spin_lock_irqsave(&cpu_rcache->lock, flags); - - iova_magazine_free_pfns(cpu_rcache->loaded, iovad); - iova_magazine_free(cpu_rcache->loaded); - - iova_magazine_free_pfns(cpu_rcache->prev, iovad); - iova_magazine_free(cpu_rcache->prev); - - spin_unlock_irqrestore(&cpu_rcache->lock, flags); -} - /* * free rcache data structures. */ static void free_iova_rcaches(struct iova_domain *iovad) { struct iova_rcache *rcache; - unsigned long flags; + struct iova_cpu_rcache *cpu_rcache; unsigned int cpu; int i, j; for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { rcache = &iovad->rcaches[i]; - for_each_possible_cpu(cpu) - free_cpu_iova_rcache(cpu, iovad, rcache); - spin_lock_irqsave(&rcache->lock, flags); + for_each_possible_cpu(cpu) { + cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); + iova_magazine_free(cpu_rcache->loaded); + iova_magazine_free(cpu_rcache->prev); + } free_percpu(rcache->cpu_rcaches); - for (j = 0; j < rcache->depot_size; ++j) { - iova_magazine_free_pfns(rcache->depot[j], iovad); + for (j = 0; j < rcache->depot_size; ++j) iova_magazine_free(rcache->depot[j]); - } - spin_unlock_irqrestore(&rcache->lock, flags); } } -- cgit v1.2.3 From b826ee9a4f1cbf83cadc5a307de8eea27637699a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 19 Sep 2017 14:48:40 +0100 Subject: iommu/iova: Make rcache limit_pfn handling more robust When popping a pfn from an rcache, we are currently checking it directly against limit_pfn for viability. Since this represents iova->pfn_lo, it is technically possible for the corresponding iova->pfn_hi to be greater than limit_pfn. Although we generally get away with it in practice since limit_pfn is typically a power-of-two boundary and the IOVAs are size-aligned, it's pretty trivial to make the iova_rcache_get() path take the allocation size into account for complete safety. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index ca21196c1f2d..15ff3033bbd7 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -406,7 +406,7 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long iova_pfn; struct iova *new_iova; - iova_pfn = iova_rcache_get(iovad, size, limit_pfn); + iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1); if (iova_pfn) return iova_pfn; @@ -823,7 +823,7 @@ static unsigned long iova_magazine_pop(struct iova_magazine *mag, { BUG_ON(iova_magazine_empty(mag)); - if (mag->pfns[mag->size - 1] >= limit_pfn) + if (mag->pfns[mag->size - 1] > limit_pfn) return 0; return mag->pfns[--mag->size]; @@ -977,7 +977,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) return 0; - return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn); + return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size); } /* -- cgit v1.2.3 From e8b198402745ed413ed8229b2eb45d34016eb5d8 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 28 Sep 2017 11:31:23 +0100 Subject: iommu/iova: Try harder to allocate from rcache magazine When devices with different DMA masks are using the same domain, or for PCI devices where we usually try a speculative 32-bit allocation first, there is a fair possibility that the top PFN of the rcache stack at any given time may be unsuitable for the lower limit, prompting a fallback to allocating anew from the rbtree. Consequently, we may end up artifically increasing pressure on the 32-bit IOVA space as unused IOVAs accumulate lower down in the rcache stacks, while callers with 32-bit masks also impose unnecessary rbtree overhead. In such cases, let's try a bit harder to satisfy the allocation locally first - scanning the whole stack should still be relatively inexpensive. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 15ff3033bbd7..b0ca23682008 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -821,12 +821,21 @@ static bool iova_magazine_empty(struct iova_magazine *mag) static unsigned long iova_magazine_pop(struct iova_magazine *mag, unsigned long limit_pfn) { + int i; + unsigned long pfn; + BUG_ON(iova_magazine_empty(mag)); - if (mag->pfns[mag->size - 1] > limit_pfn) - return 0; + /* Only fall back to the rbtree if we have no suitable pfns at all */ + for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--) + if (i == 0) + return 0; + + /* Swap it to pop it */ + pfn = mag->pfns[i]; + mag->pfns[i] = mag->pfns[--mag->size]; - return mag->pfns[--mag->size]; + return pfn; } static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) -- cgit v1.2.3 From abbb8a09384f69f7bb05936879e51933c146afba Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 2 Oct 2017 11:53:31 +0100 Subject: iommu/iova: Don't try to copy anchor nodes Anchor nodes are not reserved IOVAs in the way that copy_reserved_iova() cares about - while the failure from reserve_iova() is benign since the target domain will already have its own anchor, we still don't want to be triggering spurious warnings. Reported-by: kernel test robot Signed-off-by: Robin Murphy Fixes: bb68b2fbfbd6 ('iommu/iova: Add rbtree anchor node') Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index b0ca23682008..3aee64b99df1 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -704,6 +704,9 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) struct iova *iova = rb_entry(node, struct iova, node); struct iova *new_iova; + if (iova->pfn_lo == IOVA_ANCHOR) + continue; + new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); if (!new_iova) printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", -- cgit v1.2.3 From 32b124492bdf974f68eaef1bde80dc8058aef002 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 28 Sep 2017 15:55:01 +0100 Subject: iommu/io-pgtable-arm: Convert to IOMMU API TLB sync Now that the core API issues its own post-unmap TLB sync call, push that operation out from the io-pgtable-arm internals into the users. For now, we leave the invalidation implicit in the unmap operation, since none of the current users would benefit much from any change to that. CC: Magnus Damm CC: Laurent Pinchart Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu-v3.c | 10 ++++++++++ drivers/iommu/arm-smmu.c | 20 +++++++++++++++----- drivers/iommu/io-pgtable-arm.c | 7 +------ drivers/iommu/ipmmu-vmsa.c | 10 ++++++++++ 4 files changed, 36 insertions(+), 11 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index e67ba6c40faf..ee0c7b73cff7 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1743,6 +1743,14 @@ arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) return ops->unmap(ops, iova, size); } +static void arm_smmu_iotlb_sync(struct iommu_domain *domain) +{ + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + + if (smmu) + __arm_smmu_tlb_sync(smmu); +} + static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { @@ -1963,6 +1971,8 @@ static struct iommu_ops arm_smmu_ops = { .map = arm_smmu_map, .unmap = arm_smmu_unmap, .map_sg = default_iommu_map_sg, + .flush_iotlb_all = arm_smmu_iotlb_sync, + .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, .remove_device = arm_smmu_remove_device, diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 3bdb799d3b4b..e4a82d70d446 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -250,6 +250,7 @@ enum arm_smmu_domain_stage { struct arm_smmu_domain { struct arm_smmu_device *smmu; struct io_pgtable_ops *pgtbl_ops; + const struct iommu_gather_ops *tlb_ops; struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; struct mutex init_mutex; /* Protects smmu pointer */ @@ -735,7 +736,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, enum io_pgtable_fmt fmt; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - const struct iommu_gather_ops *tlb_ops; mutex_lock(&smmu_domain->init_mutex); if (smmu_domain->smmu) @@ -813,7 +813,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ias = min(ias, 32UL); oas = min(oas, 32UL); } - tlb_ops = &arm_smmu_s1_tlb_ops; + smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops; break; case ARM_SMMU_DOMAIN_NESTED: /* @@ -833,9 +833,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, oas = min(oas, 40UL); } if (smmu->version == ARM_SMMU_V2) - tlb_ops = &arm_smmu_s2_tlb_ops_v2; + smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2; else - tlb_ops = &arm_smmu_s2_tlb_ops_v1; + smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1; break; default: ret = -EINVAL; @@ -863,7 +863,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .pgsize_bitmap = smmu->pgsize_bitmap, .ias = ias, .oas = oas, - .tlb = tlb_ops, + .tlb = smmu_domain->tlb_ops, .iommu_dev = smmu->dev, }; @@ -1259,6 +1259,14 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, return ops->unmap(ops, iova, size); } +static void arm_smmu_iotlb_sync(struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + if (smmu_domain->tlb_ops) + smmu_domain->tlb_ops->tlb_sync(smmu_domain); +} + static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, dma_addr_t iova) { @@ -1562,6 +1570,8 @@ static struct iommu_ops arm_smmu_ops = { .map = arm_smmu_map, .unmap = arm_smmu_unmap, .map_sg = default_iommu_map_sg, + .flush_iotlb_all = arm_smmu_iotlb_sync, + .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, .remove_device = arm_smmu_remove_device, diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index e8018a308868..51e5c43caed1 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -609,7 +609,6 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, size_t size) { - size_t unmapped; struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); arm_lpae_iopte *ptep = data->pgd; int lvl = ARM_LPAE_START_LVL(data); @@ -617,11 +616,7 @@ static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias))) return 0; - unmapped = __arm_lpae_unmap(data, iova, size, lvl, ptep); - if (unmapped) - io_pgtable_tlb_sync(&data->iop); - - return unmapped; + return __arm_lpae_unmap(data, iova, size, lvl, ptep); } static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 195d6e93ac71..af8140054273 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -619,6 +619,14 @@ static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova, return domain->iop->unmap(domain->iop, iova, size); } +static void ipmmu_iotlb_sync(struct iommu_domain *io_domain) +{ + struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); + + if (domain->mmu) + ipmmu_tlb_flush_all(domain); +} + static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, dma_addr_t iova) { @@ -876,6 +884,8 @@ static const struct iommu_ops ipmmu_ops = { .detach_dev = ipmmu_detach_device, .map = ipmmu_map, .unmap = ipmmu_unmap, + .flush_iotlb_all = ipmmu_iotlb_sync, + .iotlb_sync = ipmmu_iotlb_sync, .map_sg = default_iommu_map_sg, .iova_to_phys = ipmmu_iova_to_phys, .add_device = ipmmu_add_device_dma, -- cgit v1.2.3 From 4d689b619445894f6b6fcbc496f6d302bd9e44a5 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 28 Sep 2017 15:55:02 +0100 Subject: iommu/io-pgtable-arm-v7s: Convert to IOMMU API TLB sync Now that the core API issues its own post-unmap TLB sync call, push that operation out from the io-pgtable-arm-v7s internals into the users. For now, we leave the invalidation implicit in the unmap operation, since none of the current users would benefit much from any change to that. Note that the conversion of msm_iommu is implicit, since that apparently has no specific TLB sync operation anyway. CC: Yong Wu CC: Rob Clark Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm-v7s.c | 7 +------ drivers/iommu/mtk_iommu.c | 7 +++++++ drivers/iommu/qcom_iommu.c | 15 +++++++++++++++ 3 files changed, 23 insertions(+), 6 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index d665d0dc16e8..397531da8d9c 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -660,16 +660,11 @@ static int arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, size_t size) { struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); - size_t unmapped; if (WARN_ON(upper_32_bits(iova))) return 0; - unmapped = __arm_v7s_unmap(data, iova, size, 1, data->pgd); - if (unmapped) - io_pgtable_tlb_sync(&data->iop); - - return unmapped; + return __arm_v7s_unmap(data, iova, size, 1, data->pgd); } static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index bd515be5b380..d0c8dfbbd74d 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -391,6 +391,11 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain, return unmapsz; } +static void mtk_iommu_iotlb_sync(struct iommu_domain *domain) +{ + mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data()); +} + static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { @@ -490,6 +495,8 @@ static struct iommu_ops mtk_iommu_ops = { .map = mtk_iommu_map, .unmap = mtk_iommu_unmap, .map_sg = default_iommu_map_sg, + .flush_iotlb_all = mtk_iommu_iotlb_sync, + .iotlb_sync = mtk_iommu_iotlb_sync, .iova_to_phys = mtk_iommu_iova_to_phys, .add_device = mtk_iommu_add_device, .remove_device = mtk_iommu_remove_device, diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index c8a587d034b0..4a2c4378b3db 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -443,6 +443,19 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova, return ret; } +static void qcom_iommu_iotlb_sync(struct iommu_domain *domain) +{ + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + struct io_pgtable *pgtable = container_of(qcom_domain->pgtbl_ops, + struct io_pgtable, ops); + if (!qcom_domain->pgtbl_ops) + return; + + pm_runtime_get_sync(qcom_domain->iommu->dev); + qcom_iommu_tlb_sync(pgtable->cookie); + pm_runtime_put_sync(qcom_domain->iommu->dev); +} + static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { @@ -570,6 +583,8 @@ static const struct iommu_ops qcom_iommu_ops = { .map = qcom_iommu_map, .unmap = qcom_iommu_unmap, .map_sg = default_iommu_map_sg, + .flush_iotlb_all = qcom_iommu_iotlb_sync, + .iotlb_sync = qcom_iommu_iotlb_sync, .iova_to_phys = qcom_iommu_iova_to_phys, .add_device = qcom_iommu_add_device, .remove_device = qcom_iommu_remove_device, -- cgit v1.2.3 From ec154bf56b276a0bb36079a5d22a267b5f417801 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 6 Oct 2017 15:00:53 +0200 Subject: iommu/vt-d: Don't register bus-notifier under dmar_global_lock The notifier function will take the dmar_global_lock too, so lockdep complains about inverse locking order when the notifier is registered under the dmar_global_lock. Reported-by: Jan Kiszka Fixes: 59ce0515cdaf ('iommu/vt-d: Update DRHD/RMRR/ATSR device scope caches when PCI hotplug happens') Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 7 +++++-- drivers/iommu/intel-iommu.c | 10 ++++++++++ include/linux/dmar.h | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 57c920c1372d..1ea7cd537873 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -801,13 +801,16 @@ int __init dmar_dev_scope_init(void) dmar_free_pci_notify_info(info); } } - - bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb); } return dmar_dev_scope_status; } +void dmar_register_bus_notifier(void) +{ + bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb); +} + int __init dmar_table_init(void) { diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 6784a05dd6b2..934cef924461 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4752,6 +4752,16 @@ int __init intel_iommu_init(void) goto out_free_dmar; } + up_write(&dmar_global_lock); + + /* + * The bus notifier takes the dmar_global_lock, so lockdep will + * complain later when we register it under the lock. + */ + dmar_register_bus_notifier(); + + down_write(&dmar_global_lock); + if (no_iommu || dmar_disabled) { /* * We exit the function here to ensure IOMMU's remapping and diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e8ffba1052d3..e2433bc50210 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -112,6 +112,7 @@ static inline bool dmar_rcu_check(void) extern int dmar_table_init(void); extern int dmar_dev_scope_init(void); +extern void dmar_register_bus_notifier(void); extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, struct dmar_dev_scope **devices, u16 segment); extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt); -- cgit v1.2.3 From b117e0380513c186065f247a9af09dc0cd3e703d Mon Sep 17 00:00:00 2001 From: Christos Gkekas Date: Sun, 8 Oct 2017 23:33:31 +0100 Subject: iommu/vt-d: Delete unnecessary check in domain_context_mapping_one() Variable did_old is unsigned so checking whether it is greater or equal to zero is not necessary. Signed-off-by: Christos Gkekas Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 934cef924461..1dab9f73a20b 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2058,7 +2058,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, if (context_copied(context)) { u16 did_old = context_domain_id(context); - if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) { + if (did_old < cap_ndoms(iommu->cap)) { iommu->flush.flush_context(iommu, did_old, (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, -- cgit v1.2.3 From 37946d95fc1a41ed79efb613b0818c2cdecbb2fa Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 6 Oct 2017 12:16:39 +0200 Subject: iommu/amd: Add align parameter to alloc_irq_index() For multi-MSI IRQ ranges the IRQ index needs to be aligned to the power-of-two of the requested IRQ count. Extend the alloc_irq_index() function to allow such an allocation. Reported-by: Thomas Gleixner Fixes: 2b324506341cb ('iommu/amd: Add routines to manage irq remapping tables') Reviewed-by: Thomas Gleixner Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 51f8215877f5..2d4ee2555a0d 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3660,11 +3660,11 @@ out_unlock: return table; } -static int alloc_irq_index(u16 devid, int count) +static int alloc_irq_index(u16 devid, int count, bool align) { struct irq_remap_table *table; + int index, c, alignment = 1; unsigned long flags; - int index, c; struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; if (!iommu) @@ -3674,16 +3674,22 @@ static int alloc_irq_index(u16 devid, int count) if (!table) return -ENODEV; + if (align) + alignment = roundup_pow_of_two(count); + spin_lock_irqsave(&table->lock, flags); /* Scan table for free entries */ - for (c = 0, index = table->min_index; + for (index = ALIGN(table->min_index, alignment), c = 0; index < MAX_IRQS_PER_TABLE; - ++index) { - if (!iommu->irte_ops->is_allocated(table, index)) + index++) { + if (!iommu->irte_ops->is_allocated(table, index)) { c += 1; - else - c = 0; + } else { + c = 0; + index = ALIGN(index, alignment); + continue; + } if (c == count) { for (; c != 0; --c) @@ -4096,7 +4102,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, else ret = -ENOMEM; } else { - index = alloc_irq_index(devid, nr_irqs); + index = alloc_irq_index(devid, nr_irqs, false); } if (index < 0) { pr_warn("Failed to allocate IRTE\n"); -- cgit v1.2.3 From 53b9ec3fbb7da97d13951debbd42e3a0c4a7c9f7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 6 Oct 2017 12:22:06 +0200 Subject: iommu/amd: Enforce alignment for MSI IRQs Make use of the new alignment capability of alloc_irq_index() to enforce IRQ index alignment for MSI. Reported-by: Thomas Gleixner Fixes: 2b324506341cb ('iommu/amd: Add routines to manage irq remapping tables') Reviewed-by: Thomas Gleixner Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2d4ee2555a0d..cb7c531542da 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4102,7 +4102,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, else ret = -ENOMEM; } else { - index = alloc_irq_index(devid, nr_irqs, false); + bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI); + + index = alloc_irq_index(devid, nr_irqs, align); } if (index < 0) { pr_warn("Failed to allocate IRTE\n"); -- cgit v1.2.3 From 538d5b333216c3daa7a5821307164f10af73ec8c Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Wed, 20 Sep 2017 10:52:02 +0200 Subject: iommu/iova: Make rcache flush optional on IOVA allocation failure Since IOVA allocation failure is not unusual case we need to flush CPUs' rcache in hope we will succeed in next round. However, it is useful to decide whether we need rcache flush step because of two reasons: - Not scalability. On large system with ~100 CPUs iterating and flushing rcache for each CPU becomes serious bottleneck so we may want to defer it. - free_cpu_cached_iovas() does not care about max PFN we are interested in. Thus we may flush our rcaches and still get no new IOVA like in the commonly used scenario: if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift); if (!iova) iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift); 1. First alloc_iova_fast() call is limited to DMA_BIT_MASK(32) to get PCI devices a SAC address 2. alloc_iova() fails due to full 32-bit space 3. rcaches contain PFNs out of 32-bit space so free_cpu_cached_iovas() throws entries away for nothing and alloc_iova() fails again 4. Next alloc_iova_fast() call cannot take advantage of rcache since we have just defeated caches. In this case we pick the slowest option to proceed. This patch reworks flushed_rcache local flag to be additional function argument instead and control rcache flush step. Also, it updates all users to do the flush as the last chance. Signed-off-by: Tomasz Nowicki Reviewed-by: Robin Murphy Tested-by: Nate Watterson Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 5 +++-- drivers/iommu/dma-iommu.c | 6 ++++-- drivers/iommu/intel-iommu.c | 5 +++-- drivers/iommu/iova.c | 11 ++++++----- include/linux/iova.h | 5 +++-- 5 files changed, 19 insertions(+), 13 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 647ab7691aee..3d64c844d8b1 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1546,10 +1546,11 @@ static unsigned long dma_ops_alloc_iova(struct device *dev, if (dma_mask > DMA_BIT_MASK(32)) pfn = alloc_iova_fast(&dma_dom->iovad, pages, - IOVA_PFN(DMA_BIT_MASK(32))); + IOVA_PFN(DMA_BIT_MASK(32)), false); if (!pfn) - pfn = alloc_iova_fast(&dma_dom->iovad, pages, IOVA_PFN(dma_mask)); + pfn = alloc_iova_fast(&dma_dom->iovad, pages, + IOVA_PFN(dma_mask), true); return (pfn << PAGE_SHIFT); } diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 191be9c80a8a..25914d36c5ac 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -370,10 +370,12 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, /* Try to get PCI devices a SAC address */ if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) - iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift); + iova = alloc_iova_fast(iovad, iova_len, + DMA_BIT_MASK(32) >> shift, false); if (!iova) - iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift); + iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, + true); return (dma_addr_t)iova << shift; } diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index ebb48353dd39..b3914fce8254 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3469,11 +3469,12 @@ static unsigned long intel_alloc_iova(struct device *dev, * from higher range */ iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, - IOVA_PFN(DMA_BIT_MASK(32))); + IOVA_PFN(DMA_BIT_MASK(32)), false); if (iova_pfn) return iova_pfn; } - iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask)); + iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, + IOVA_PFN(dma_mask), true); if (unlikely(!iova_pfn)) { pr_err("Allocating %ld-page iova for %s failed", nrpages, dev_name(dev)); diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 3aee64b99df1..84bda3a4dafc 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -395,14 +395,15 @@ EXPORT_SYMBOL_GPL(free_iova); * @iovad: - iova domain in question * @size: - size of page frames to allocate * @limit_pfn: - max limit address + * @flush_rcache: - set to flush rcache on regular allocation failure * This function tries to satisfy an iova allocation from the rcache, - * and falls back to regular allocation on failure. + * and falls back to regular allocation on failure. If regular allocation + * fails too and the flush_rcache flag is set then the rcache will be flushed. */ unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn) + unsigned long limit_pfn, bool flush_rcache) { - bool flushed_rcache = false; unsigned long iova_pfn; struct iova *new_iova; @@ -415,11 +416,11 @@ retry: if (!new_iova) { unsigned int cpu; - if (flushed_rcache) + if (!flush_rcache) return 0; /* Try replenishing IOVAs by flushing rcache. */ - flushed_rcache = true; + flush_rcache = false; for_each_online_cpu(cpu) free_cpu_cached_iovas(cpu, iovad); goto retry; diff --git a/include/linux/iova.h b/include/linux/iova.h index c696ee81054e..928442dda565 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -150,7 +150,7 @@ void queue_iova(struct iova_domain *iovad, unsigned long pfn, unsigned long pages, unsigned long data); unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn); + unsigned long limit_pfn, bool flush_rcache); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); @@ -212,7 +212,8 @@ static inline void queue_iova(struct iova_domain *iovad, static inline unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn) + unsigned long limit_pfn, + bool flush_rcache) { return 0; } -- cgit v1.2.3