From bb6bfd79d9bc69f0808a4156ec3ca9fb78694039 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 27 May 2021 14:37:09 -0500 Subject: iommu: Remove unused of_get_dma_window() of_get_dma_window() was added in 2012 and removed in 2014 in commit 891846516317 ("memory: Add NVIDIA Tegra memory controller support"). Remove it and simplify the header to use forward declarations for structs rather than includes. Cc: Joerg Roedel Cc: Will Deacon Cc: Frank Rowand Cc: iommu@lists.linux-foundation.org Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20210527193710.1281746-1-robh@kernel.org Signed-off-by: Joerg Roedel --- include/linux/of_iommu.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index 16f4b3e87f20..55c1eb300a86 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -2,29 +2,18 @@ #ifndef __OF_IOMMU_H #define __OF_IOMMU_H -#include -#include -#include +struct device; +struct device_node; +struct iommu_ops; #ifdef CONFIG_OF_IOMMU -extern int of_get_dma_window(struct device_node *dn, const char *prefix, - int index, unsigned long *busno, dma_addr_t *addr, - size_t *size); - extern const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id); #else -static inline int of_get_dma_window(struct device_node *dn, const char *prefix, - int index, unsigned long *busno, dma_addr_t *addr, - size_t *size) -{ - return -EINVAL; -} - static inline const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id) -- cgit v1.2.3 From 40483774141625b9685b177fb6e1f36de48d33f8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:00:59 +0800 Subject: iommu/vt-d: Use iommu_sva_alloc(free)_pasid() helpers Align the pasid alloc/free code with the generic helpers defined in the iommu core. This also refactored the SVA binding code to improve the readability. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/iommu.c | 3 + drivers/iommu/intel/svm.c | 278 ++++++++++++++++++-------------------------- include/linux/intel-iommu.h | 1 - 4 files changed, 120 insertions(+), 163 deletions(-) (limited to 'include') diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 7e5b240b801d..a37bd54c5b90 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -42,6 +42,7 @@ config INTEL_IOMMU_SVM select PCI_PRI select MMU_NOTIFIER select IOASID + select IOMMU_SVA_LIB help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b0ba187cb7f8..0ca7f8a2f38e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5411,6 +5411,9 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) if (!info) return -EINVAL; + if (intel_iommu_enable_pasid(info->iommu, dev)) + return -ENODEV; + if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) return -EINVAL; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 82b0627ad7e7..da4310686ed3 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -23,9 +23,11 @@ #include #include "pasid.h" +#include "../iommu-sva-lib.h" static irqreturn_t prq_event_thread(int irq, void *d); static void intel_svm_drain_prq(struct device *dev, u32 pasid); +#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) #define PRQ_ORDER 0 @@ -222,7 +224,6 @@ static const struct mmu_notifier_ops intel_mmuops = { }; static DEFINE_MUTEX(pasid_mutex); -static LIST_HEAD(global_svm_list); #define for_each_svm_dev(sdev, svm, d) \ list_for_each_entry((sdev), &(svm)->devs, list) \ @@ -477,79 +478,80 @@ static void load_pasid(struct mm_struct *mm, u32 pasid) mutex_unlock(&mm->context.lock); } -/* Caller must hold pasid_mutex, mm reference */ -static int -intel_svm_bind_mm(struct device *dev, unsigned int flags, - struct mm_struct *mm, struct intel_svm_dev **sd) +static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, + unsigned int flags) { - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); - struct intel_svm *svm = NULL, *t; - struct device_domain_info *info; - struct intel_svm_dev *sdev; - unsigned long iflags; - int pasid_max; - int ret; + ioasid_t max_pasid = dev_is_pci(dev) ? + pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id; - if (!iommu || dmar_disabled) - return -EINVAL; + return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); +} - if (!intel_svm_capable(iommu)) - return -ENOTSUPP; +static void intel_svm_free_pasid(struct mm_struct *mm) +{ + iommu_sva_free_pasid(mm); +} - if (dev_is_pci(dev)) { - pasid_max = pci_max_pasids(to_pci_dev(dev)); - if (pasid_max < 0) - return -EINVAL; - } else - pasid_max = 1 << 20; +static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, + struct device *dev, + struct mm_struct *mm, + unsigned int flags) +{ + struct device_domain_info *info = get_domain_info(dev); + unsigned long iflags, sflags; + struct intel_svm_dev *sdev; + struct intel_svm *svm; + int ret = 0; - /* Bind supervisor PASID shuld have mm = NULL */ - if (flags & SVM_FLAG_SUPERVISOR_MODE) { - if (!ecap_srs(iommu->ecap) || mm) { - pr_err("Supervisor PASID with user provided mm.\n"); - return -EINVAL; - } - } + svm = pasid_private_find(mm->pasid); + if (!svm) { + svm = kzalloc(sizeof(*svm), GFP_KERNEL); + if (!svm) + return ERR_PTR(-ENOMEM); - list_for_each_entry(t, &global_svm_list, list) { - if (t->mm != mm) - continue; + svm->pasid = mm->pasid; + svm->mm = mm; + svm->flags = flags; + INIT_LIST_HEAD_RCU(&svm->devs); - svm = t; - if (svm->pasid >= pasid_max) { - dev_warn(dev, - "Limited PASID width. Cannot use existing PASID %d\n", - svm->pasid); - ret = -ENOSPC; - goto out; + if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) { + svm->notifier.ops = &intel_mmuops; + ret = mmu_notifier_register(&svm->notifier, mm); + if (ret) { + kfree(svm); + return ERR_PTR(ret); + } } - /* Find the matching device in svm list */ - for_each_svm_dev(sdev, svm, dev) { - sdev->users++; - goto success; + ret = pasid_private_add(svm->pasid, svm); + if (ret) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + kfree(svm); + return ERR_PTR(ret); } + } - break; + /* Find the matching device in svm list */ + for_each_svm_dev(sdev, svm, dev) { + sdev->users++; + goto success; } sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); if (!sdev) { ret = -ENOMEM; - goto out; + goto free_svm; } + sdev->dev = dev; sdev->iommu = iommu; - - ret = intel_iommu_enable_pasid(iommu, dev); - if (ret) { - kfree(sdev); - goto out; - } - - info = get_domain_info(dev); sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); + sdev->users = 1; + sdev->pasid = svm->pasid; + sdev->sva.dev = dev; + init_rcu_head(&sdev->rcu); if (info->ats_enabled) { sdev->dev_iotlb = 1; sdev->qdep = info->ats_qdep; @@ -557,96 +559,37 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, sdev->qdep = 0; } - /* Finish the setup now we know we're keeping it */ - sdev->users = 1; - init_rcu_head(&sdev->rcu); - - if (!svm) { - svm = kzalloc(sizeof(*svm), GFP_KERNEL); - if (!svm) { - ret = -ENOMEM; - goto sdev_err; - } - - if (pasid_max > intel_pasid_max_id) - pasid_max = intel_pasid_max_id; - - /* Do not use PASID 0, reserved for RID to PASID */ - svm->pasid = ioasid_alloc(NULL, PASID_MIN, - pasid_max - 1, NULL); - if (svm->pasid == INVALID_IOASID) { - ret = -ENOSPC; - goto svm_err; - } - - ret = pasid_private_add(svm->pasid, svm); - if (ret) - goto pasid_err; + /* Setup the pasid table: */ + sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? + PASID_FLAG_SUPERVISOR_MODE : 0; + sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; + spin_lock_irqsave(&iommu->lock, iflags); + ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, + FLPT_DEFAULT_DID, sflags); + spin_unlock_irqrestore(&iommu->lock, iflags); - svm->notifier.ops = &intel_mmuops; - svm->mm = mm; - svm->flags = flags; - INIT_LIST_HEAD_RCU(&svm->devs); - INIT_LIST_HEAD(&svm->list); - ret = -ENOMEM; - if (mm) { - ret = mmu_notifier_register(&svm->notifier, mm); - if (ret) - goto priv_err; - } + if (ret) + goto free_sdev; - spin_lock_irqsave(&iommu->lock, iflags); - ret = intel_pasid_setup_first_level(iommu, dev, - mm ? mm->pgd : init_mm.pgd, - svm->pasid, FLPT_DEFAULT_DID, - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | - (cpu_feature_enabled(X86_FEATURE_LA57) ? - PASID_FLAG_FL5LP : 0)); - spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) { - if (mm) - mmu_notifier_unregister(&svm->notifier, mm); -priv_err: - pasid_private_remove(svm->pasid); -pasid_err: - ioasid_put(svm->pasid); -svm_err: - kfree(svm); -sdev_err: - kfree(sdev); - goto out; - } + /* The newly allocated pasid is loaded to the mm. */ + if (!(flags & SVM_FLAG_SUPERVISOR_MODE) && list_empty(&svm->devs)) + load_pasid(mm, svm->pasid); - list_add_tail(&svm->list, &global_svm_list); - if (mm) { - /* The newly allocated pasid is loaded to the mm. */ - load_pasid(mm, svm->pasid); - } - } else { - /* - * Binding a new device with existing PASID, need to setup - * the PASID entry. - */ - spin_lock_irqsave(&iommu->lock, iflags); - ret = intel_pasid_setup_first_level(iommu, dev, - mm ? mm->pgd : init_mm.pgd, - svm->pasid, FLPT_DEFAULT_DID, - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | - (cpu_feature_enabled(X86_FEATURE_LA57) ? - PASID_FLAG_FL5LP : 0)); - spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) - goto sdev_err; - } list_add_rcu(&sdev->list, &svm->devs); success: - sdev->pasid = svm->pasid; - sdev->sva.dev = dev; - if (sd) - *sd = sdev; - ret = 0; -out: - return ret; + return &sdev->sva; + +free_sdev: + kfree(sdev); +free_svm: + if (list_empty(&svm->devs)) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + pasid_private_remove(mm->pasid); + kfree(svm); + } + + return ERR_PTR(ret); } /* Caller must hold pasid_mutex */ @@ -655,6 +598,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) struct intel_svm_dev *sdev; struct intel_iommu *iommu; struct intel_svm *svm; + struct mm_struct *mm; int ret = -EINVAL; iommu = device_to_iommu(dev, NULL, NULL); @@ -664,6 +608,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); if (ret) goto out; + mm = svm->mm; if (sdev) { sdev->users--; @@ -682,13 +627,12 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { - ioasid_put(svm->pasid); - if (svm->mm) { - mmu_notifier_unregister(&svm->notifier, svm->mm); + intel_svm_free_pasid(mm); + if (svm->notifier.ops) { + mmu_notifier_unregister(&svm->notifier, mm); /* Clear mm's pasid. */ - load_pasid(svm->mm, PASID_DISABLED); + load_pasid(mm, PASID_DISABLED); } - list_del(&svm->list); pasid_private_remove(svm->pasid); /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. @@ -1073,31 +1017,42 @@ prq_advance: return IRQ_RETVAL(handled); } -#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) -struct iommu_sva * -intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) +struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) { - struct iommu_sva *sva = ERR_PTR(-EINVAL); - struct intel_svm_dev *sdev = NULL; + struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); unsigned int flags = 0; + struct iommu_sva *sva; int ret; - /* - * TODO: Consolidate with generic iommu-sva bind after it is merged. - * It will require shared SVM data structures, i.e. combine io_mm - * and intel_svm etc. - */ if (drvdata) flags = *(unsigned int *)drvdata; + + if (flags & SVM_FLAG_SUPERVISOR_MODE) { + if (!ecap_srs(iommu->ecap)) { + dev_err(dev, "%s: Supervisor PASID not supported\n", + iommu->name); + return ERR_PTR(-EOPNOTSUPP); + } + + if (mm) { + dev_err(dev, "%s: Supervisor PASID with user provided mm\n", + iommu->name); + return ERR_PTR(-EINVAL); + } + + mm = &init_mm; + } + mutex_lock(&pasid_mutex); - ret = intel_svm_bind_mm(dev, flags, mm, &sdev); - if (ret) - sva = ERR_PTR(ret); - else if (sdev) - sva = &sdev->sva; - else - WARN(!sdev, "SVM bind succeeded with no sdev!\n"); + ret = intel_svm_alloc_pasid(dev, mm, flags); + if (ret) { + mutex_unlock(&pasid_mutex); + return ERR_PTR(ret); + } + sva = intel_svm_bind_mm(iommu, dev, mm, flags); + if (IS_ERR_OR_NULL(sva)) + intel_svm_free_pasid(mm); mutex_unlock(&pasid_mutex); return sva; @@ -1105,10 +1060,9 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) void intel_svm_unbind(struct iommu_sva *sva) { - struct intel_svm_dev *sdev; + struct intel_svm_dev *sdev = to_intel_svm_dev(sva); mutex_lock(&pasid_mutex); - sdev = to_intel_svm_dev(sva); intel_svm_unbind_mm(sdev->dev, sdev->pasid); mutex_unlock(&pasid_mutex); } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 03faf20a6817..4e8bb186daa7 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -791,7 +791,6 @@ struct intel_svm { u32 pasid; int gpasid; /* In case that guest PASID is different from host PASID */ struct list_head devs; - struct list_head list; }; #else static inline void intel_svm_check(struct intel_iommu *iommu) {} -- cgit v1.2.3 From 4c82b88696ac57810ab923b3c5b0734646b9b69f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:01:02 +0800 Subject: iommu/vt-d: Allocate/register iopf queue for sva devices This allocates and registers the iopf queue infrastructure for devices which want to support IO page fault for SVA. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-11-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 66 ++++++++++++++++++++++++++++++++------------- drivers/iommu/intel/svm.c | 37 ++++++++++++++++++++----- include/linux/intel-iommu.h | 2 ++ 3 files changed, 79 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 0ca7f8a2f38e..e78773d46d7d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -46,6 +46,7 @@ #include #include "../irq_remapping.h" +#include "../iommu-sva-lib.h" #include "pasid.h" #include "cap_audit.h" @@ -5338,6 +5339,34 @@ static int intel_iommu_disable_auxd(struct device *dev) return 0; } +static int intel_iommu_enable_sva(struct device *dev) +{ + struct device_domain_info *info = get_domain_info(dev); + struct intel_iommu *iommu = info->iommu; + + if (!info || !iommu || dmar_disabled) + return -EINVAL; + + if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) + return -ENODEV; + + if (intel_iommu_enable_pasid(iommu, dev)) + return -ENODEV; + + if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) + return -EINVAL; + + return iopf_queue_add_device(iommu->iopf_queue, dev); +} + +static int intel_iommu_disable_sva(struct device *dev) +{ + struct device_domain_info *info = get_domain_info(dev); + struct intel_iommu *iommu = info->iommu; + + return iopf_queue_remove_device(iommu->iopf_queue, dev); +} + /* * A PCI express designated vendor specific extended capability is defined * in the section 3.7 of Intel scalable I/O virtualization technical spec @@ -5399,38 +5428,37 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat) static int intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) { - if (feat == IOMMU_DEV_FEAT_AUX) + switch (feat) { + case IOMMU_DEV_FEAT_AUX: return intel_iommu_enable_auxd(dev); - if (feat == IOMMU_DEV_FEAT_IOPF) + case IOMMU_DEV_FEAT_IOPF: return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV; - if (feat == IOMMU_DEV_FEAT_SVA) { - struct device_domain_info *info = get_domain_info(dev); - - if (!info) - return -EINVAL; - - if (intel_iommu_enable_pasid(info->iommu, dev)) - return -ENODEV; + case IOMMU_DEV_FEAT_SVA: + return intel_iommu_enable_sva(dev); - if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) - return -EINVAL; - - if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) - return 0; + default: + return -ENODEV; } - - return -ENODEV; } static int intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) { - if (feat == IOMMU_DEV_FEAT_AUX) + switch (feat) { + case IOMMU_DEV_FEAT_AUX: return intel_iommu_disable_auxd(dev); - return -ENODEV; + case IOMMU_DEV_FEAT_IOPF: + return 0; + + case IOMMU_DEV_FEAT_SVA: + return intel_iommu_disable_sva(dev); + + default: + return -ENODEV; + } } static bool diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index d51ddece4259..4dc3ab36e9ae 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -84,6 +84,7 @@ svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) int intel_svm_enable_prq(struct intel_iommu *iommu) { + struct iopf_queue *iopfq; struct page *pages; int irq, ret; @@ -100,13 +101,20 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", iommu->name); ret = -EINVAL; - err: - free_pages((unsigned long)iommu->prq, PRQ_ORDER); - iommu->prq = NULL; - return ret; + goto free_prq; } iommu->pr_irq = irq; + snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), + "dmar%d-iopfq", iommu->seq_id); + iopfq = iopf_queue_alloc(iommu->iopfq_name); + if (!iopfq) { + pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name); + ret = -ENOMEM; + goto free_hwirq; + } + iommu->iopf_queue = iopfq; + snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id); ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT, @@ -114,9 +122,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) if (ret) { pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", iommu->name); - dmar_free_hwirq(irq); - iommu->pr_irq = 0; - goto err; + goto free_iopfq; } dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); @@ -125,6 +131,18 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) init_completion(&iommu->prq_complete); return 0; + +free_iopfq: + iopf_queue_free(iommu->iopf_queue); + iommu->iopf_queue = NULL; +free_hwirq: + dmar_free_hwirq(irq); + iommu->pr_irq = 0; +free_prq: + free_pages((unsigned long)iommu->prq, PRQ_ORDER); + iommu->prq = NULL; + + return ret; } int intel_svm_finish_prq(struct intel_iommu *iommu) @@ -139,6 +157,11 @@ int intel_svm_finish_prq(struct intel_iommu *iommu) iommu->pr_irq = 0; } + if (iommu->iopf_queue) { + iopf_queue_free(iommu->iopf_queue); + iommu->iopf_queue = NULL; + } + free_pages((unsigned long)iommu->prq, PRQ_ORDER); iommu->prq = NULL; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4e8bb186daa7..222520d149c1 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -606,6 +606,8 @@ struct intel_iommu { struct completion prq_complete; struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */ #endif + struct iopf_queue *iopf_queue; + unsigned char iopfq_name[16]; struct q_inval *qi; /* Queued invalidation info */ u32 *iommu_state; /* Store iommu states between suspend and resume.*/ -- cgit v1.2.3 From e93a67f5a0eef3e9ab5b4649cac5c3b831c6a9db Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:01:04 +0800 Subject: iommu/vt-d: Add prq_report trace event This adds a new trace event to track the page fault request report. This event will provide almost all information defined in a page request descriptor. A sample output: | prq_report: dmar0/0000:00:0a.0 seq# 1: rid=0x50 addr=0x559ef6f97 r---- pasid=0x2 index=0x1 | prq_report: dmar0/0000:00:0a.0 seq# 2: rid=0x50 addr=0x559ef6f9c rw--l pasid=0x2 index=0x1 | prq_report: dmar0/0000:00:0a.0 seq# 3: rid=0x50 addr=0x559ef6f98 r---- pasid=0x2 index=0x1 | prq_report: dmar0/0000:00:0a.0 seq# 4: rid=0x50 addr=0x559ef6f9d rw--l pasid=0x2 index=0x1 | prq_report: dmar0/0000:00:0a.0 seq# 5: rid=0x50 addr=0x559ef6f99 r---- pasid=0x2 index=0x1 | prq_report: dmar0/0000:00:0a.0 seq# 6: rid=0x50 addr=0x559ef6f9e rw--l pasid=0x2 index=0x1 | prq_report: dmar0/0000:00:0a.0 seq# 7: rid=0x50 addr=0x559ef6f9a r---- pasid=0x2 index=0x1 | prq_report: dmar0/0000:00:0a.0 seq# 8: rid=0x50 addr=0x559ef6f9f rw--l pasid=0x2 index=0x1 This will be helpful for I/O page fault related debugging. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-13-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 7 +++++++ include/linux/intel-iommu.h | 29 +++++++++++++++++++++++++++++ include/trace/events/intel_iommu.h | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) (limited to 'include') diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index ade157b64ce7..d3d028c6a727 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "pasid.h" #include "../iommu-sva-lib.h" @@ -976,12 +977,18 @@ bad_req: goto bad_req; } + sdev->prq_seq_number++; + /* * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ if (intel_svm_prq_report(sdev->dev, req)) handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + + trace_prq_report(iommu, sdev->dev, req->qw_0, req->qw_1, + req->priv_data[0], req->priv_data[1], + sdev->prq_seq_number); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 222520d149c1..98b04fa9373e 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -778,6 +778,7 @@ struct intel_svm_dev { struct device *dev; struct intel_iommu *iommu; struct iommu_sva sva; + unsigned long prq_seq_number; u32 pasid; int users; u16 did; @@ -828,4 +829,32 @@ static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) #define intel_iommu_enabled (0) #endif +static inline const char *decode_prq_descriptor(char *str, size_t size, + u64 dw0, u64 dw1, u64 dw2, u64 dw3) +{ + char *buf = str; + int bytes; + + bytes = snprintf(buf, size, + "rid=0x%llx addr=0x%llx %c%c%c%c%c pasid=0x%llx index=0x%llx", + FIELD_GET(GENMASK_ULL(31, 16), dw0), + FIELD_GET(GENMASK_ULL(63, 12), dw1), + dw1 & BIT_ULL(0) ? 'r' : '-', + dw1 & BIT_ULL(1) ? 'w' : '-', + dw0 & BIT_ULL(52) ? 'x' : '-', + dw0 & BIT_ULL(53) ? 'p' : '-', + dw1 & BIT_ULL(2) ? 'l' : '-', + FIELD_GET(GENMASK_ULL(51, 32), dw0), + FIELD_GET(GENMASK_ULL(11, 3), dw1)); + + /* Private Data */ + if (dw0 & BIT_ULL(9)) { + size -= bytes; + buf += bytes; + snprintf(buf, size, " private=0x%llx/0x%llx\n", dw2, dw3); + } + + return str; +} + #endif diff --git a/include/trace/events/intel_iommu.h b/include/trace/events/intel_iommu.h index d233f2916584..e5c1ca6d16ee 100644 --- a/include/trace/events/intel_iommu.h +++ b/include/trace/events/intel_iommu.h @@ -15,6 +15,8 @@ #include #include +#define MSG_MAX 256 + TRACE_EVENT(qi_submit, TP_PROTO(struct intel_iommu *iommu, u64 qw0, u64 qw1, u64 qw2, u64 qw3), @@ -51,6 +53,41 @@ TRACE_EVENT(qi_submit, __entry->qw0, __entry->qw1, __entry->qw2, __entry->qw3 ) ); + +TRACE_EVENT(prq_report, + TP_PROTO(struct intel_iommu *iommu, struct device *dev, + u64 dw0, u64 dw1, u64 dw2, u64 dw3, + unsigned long seq), + + TP_ARGS(iommu, dev, dw0, dw1, dw2, dw3, seq), + + TP_STRUCT__entry( + __field(u64, dw0) + __field(u64, dw1) + __field(u64, dw2) + __field(u64, dw3) + __field(unsigned long, seq) + __string(iommu, iommu->name) + __string(dev, dev_name(dev)) + __dynamic_array(char, buff, MSG_MAX) + ), + + TP_fast_assign( + __entry->dw0 = dw0; + __entry->dw1 = dw1; + __entry->dw2 = dw2; + __entry->dw3 = dw3; + __entry->seq = seq; + __assign_str(iommu, iommu->name); + __assign_str(dev, dev_name(dev)); + ), + + TP_printk("%s/%s seq# %ld: %s", + __get_str(iommu), __get_str(dev), __entry->seq, + decode_prq_descriptor(__get_str(buff), MSG_MAX, __entry->dw0, + __entry->dw1, __entry->dw2, __entry->dw3) + ) +); #endif /* _TRACE_INTEL_IOMMU_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 55ee5e67a59a1b6f388d7a1c7b24022145f47a3e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:01:05 +0800 Subject: iommu/vt-d: Add common code for dmar latency performance monitors The execution time of some operations is very performance critical, such as cache invalidation and PRQ processing time. This adds some common code to monitor the execution time range of those operations. The interfaces include enabling/disabling, checking status, updating sampling data and providing a common string format for users. Signed-off-by: Fenghua Yu Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-14-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/Kconfig | 3 + drivers/iommu/intel/Makefile | 1 + drivers/iommu/intel/perf.c | 166 +++++++++++++++++++++++++++++++++++++++++++ drivers/iommu/intel/perf.h | 73 +++++++++++++++++++ include/linux/intel-iommu.h | 1 + 5 files changed, 244 insertions(+) create mode 100644 drivers/iommu/intel/perf.c create mode 100644 drivers/iommu/intel/perf.h (limited to 'include') diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index a37bd54c5b90..59be5447b775 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -3,6 +3,9 @@ config DMAR_TABLE bool +config DMAR_PERF + bool + config INTEL_IOMMU bool "Support for Intel IOMMU using DMA Remapping Devices" depends on PCI_MSI && ACPI && (X86 || IA64) diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index ae236ec7d219..fa0dae16441c 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_DMAR_TABLE) += dmar.o obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o +obj-$(CONFIG_DMAR_PERF) += perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c new file mode 100644 index 000000000000..faaa96dda437 --- /dev/null +++ b/drivers/iommu/intel/perf.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +/** + * perf.c - performance monitor + * + * Copyright (C) 2021 Intel Corporation + * + * Author: Lu Baolu + * Fenghua Yu + */ + +#include +#include + +#include "perf.h" + +static DEFINE_SPINLOCK(latency_lock); + +bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type) +{ + struct latency_statistic *lstat = iommu->perf_statistic; + + return lstat && lstat[type].enabled; +} + +int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type) +{ + struct latency_statistic *lstat; + unsigned long flags; + int ret = -EBUSY; + + if (dmar_latency_enabled(iommu, type)) + return 0; + + spin_lock_irqsave(&latency_lock, flags); + if (!iommu->perf_statistic) { + iommu->perf_statistic = kzalloc(sizeof(*lstat) * DMAR_LATENCY_NUM, + GFP_ATOMIC); + if (!iommu->perf_statistic) { + ret = -ENOMEM; + goto unlock_out; + } + } + + lstat = iommu->perf_statistic; + + if (!lstat[type].enabled) { + lstat[type].enabled = true; + lstat[type].counter[COUNTS_MIN] = UINT_MAX; + ret = 0; + } +unlock_out: + spin_unlock_irqrestore(&latency_lock, flags); + + return ret; +} + +void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type) +{ + struct latency_statistic *lstat = iommu->perf_statistic; + unsigned long flags; + + if (!dmar_latency_enabled(iommu, type)) + return; + + spin_lock_irqsave(&latency_lock, flags); + memset(&lstat[type], 0, sizeof(*lstat) * DMAR_LATENCY_NUM); + spin_unlock_irqrestore(&latency_lock, flags); +} + +void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency) +{ + struct latency_statistic *lstat = iommu->perf_statistic; + unsigned long flags; + u64 min, max; + + if (!dmar_latency_enabled(iommu, type)) + return; + + spin_lock_irqsave(&latency_lock, flags); + if (latency < 100) + lstat[type].counter[COUNTS_10e2]++; + else if (latency < 1000) + lstat[type].counter[COUNTS_10e3]++; + else if (latency < 10000) + lstat[type].counter[COUNTS_10e4]++; + else if (latency < 100000) + lstat[type].counter[COUNTS_10e5]++; + else if (latency < 1000000) + lstat[type].counter[COUNTS_10e6]++; + else if (latency < 10000000) + lstat[type].counter[COUNTS_10e7]++; + else + lstat[type].counter[COUNTS_10e8_plus]++; + + min = lstat[type].counter[COUNTS_MIN]; + max = lstat[type].counter[COUNTS_MAX]; + lstat[type].counter[COUNTS_MIN] = min_t(u64, min, latency); + lstat[type].counter[COUNTS_MAX] = max_t(u64, max, latency); + lstat[type].counter[COUNTS_SUM] += latency; + lstat[type].samples++; + spin_unlock_irqrestore(&latency_lock, flags); +} + +static char *latency_counter_names[] = { + " <0.1us", + " 0.1us-1us", " 1us-10us", " 10us-100us", + " 100us-1ms", " 1ms-10ms", " >=10ms", + " min(us)", " max(us)", " average(us)" +}; + +static char *latency_type_names[] = { + " inv_iotlb", " inv_devtlb", " inv_iec", + " svm_prq" +}; + +int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size) +{ + struct latency_statistic *lstat = iommu->perf_statistic; + unsigned long flags; + int bytes = 0, i, j; + + memset(str, 0, size); + + for (i = 0; i < COUNTS_NUM; i++) + bytes += snprintf(str + bytes, size - bytes, + "%s", latency_counter_names[i]); + + spin_lock_irqsave(&latency_lock, flags); + for (i = 0; i < DMAR_LATENCY_NUM; i++) { + if (!dmar_latency_enabled(iommu, i)) + continue; + + bytes += snprintf(str + bytes, size - bytes, + "\n%s", latency_type_names[i]); + + for (j = 0; j < COUNTS_NUM; j++) { + u64 val = lstat[i].counter[j]; + + switch (j) { + case COUNTS_MIN: + if (val == UINT_MAX) + val = 0; + else + val /= 1000; + break; + case COUNTS_MAX: + val /= 1000; + break; + case COUNTS_SUM: + if (lstat[i].samples) + val /= (lstat[i].samples * 1000); + else + val = 0; + break; + default: + break; + } + + bytes += snprintf(str + bytes, size - bytes, + "%12lld", val); + } + } + spin_unlock_irqrestore(&latency_lock, flags); + + return bytes; +} diff --git a/drivers/iommu/intel/perf.h b/drivers/iommu/intel/perf.h new file mode 100644 index 000000000000..fd6db8049d1a --- /dev/null +++ b/drivers/iommu/intel/perf.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * perf.h - performance monitor header + * + * Copyright (C) 2021 Intel Corporation + * + * Author: Lu Baolu + */ + +enum latency_type { + DMAR_LATENCY_INV_IOTLB = 0, + DMAR_LATENCY_INV_DEVTLB, + DMAR_LATENCY_INV_IEC, + DMAR_LATENCY_PRQ, + DMAR_LATENCY_NUM +}; + +enum latency_count { + COUNTS_10e2 = 0, /* < 0.1us */ + COUNTS_10e3, /* 0.1us ~ 1us */ + COUNTS_10e4, /* 1us ~ 10us */ + COUNTS_10e5, /* 10us ~ 100us */ + COUNTS_10e6, /* 100us ~ 1ms */ + COUNTS_10e7, /* 1ms ~ 10ms */ + COUNTS_10e8_plus, /* 10ms and plus*/ + COUNTS_MIN, + COUNTS_MAX, + COUNTS_SUM, + COUNTS_NUM +}; + +struct latency_statistic { + bool enabled; + u64 counter[COUNTS_NUM]; + u64 samples; +}; + +#ifdef CONFIG_DMAR_PERF +int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type); +void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type); +bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type); +void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, + u64 latency); +int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size); +#else +static inline int +dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type) +{ + return -EINVAL; +} + +static inline void +dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type) +{ +} + +static inline bool +dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type) +{ + return false; +} + +static inline void +dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency) +{ +} + +static inline int +dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size) +{ + return 0; +} +#endif /* CONFIG_DMAR_PERF */ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 98b04fa9373e..f5cf31dd7280 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -621,6 +621,7 @@ struct intel_iommu { u32 flags; /* Software defined flags */ struct dmar_drhd_unit *drhd; + void *perf_statistic; }; /* Per subdevice private data */ -- cgit v1.2.3 From 1f106ff0ea2782a6bc49bb927e4789681a2ec507 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 10 Jun 2021 10:01:11 +0800 Subject: iommu/vt-d: Use bitfields for DMAR capabilities IOTLB device presence, iommu coherency and snooping are boolean capabilities. Use them as bits and keep them adjacent. Structure layout before the reorg. $ pahole -C dmar_domain drivers/iommu/intel/dmar.o struct dmar_domain { int nid; /* 0 4 */ unsigned int iommu_refcnt[128]; /* 4 512 */ /* --- cacheline 8 boundary (512 bytes) was 4 bytes ago --- */ u16 iommu_did[128]; /* 516 256 */ /* --- cacheline 12 boundary (768 bytes) was 4 bytes ago --- */ bool has_iotlb_device; /* 772 1 */ /* XXX 3 bytes hole, try to pack */ struct list_head devices; /* 776 16 */ struct list_head subdevices; /* 792 16 */ struct iova_domain iovad __attribute__((__aligned__(8))); /* 808 2320 */ /* --- cacheline 48 boundary (3072 bytes) was 56 bytes ago --- */ struct dma_pte * pgd; /* 3128 8 */ /* --- cacheline 49 boundary (3136 bytes) --- */ int gaw; /* 3136 4 */ int agaw; /* 3140 4 */ int flags; /* 3144 4 */ int iommu_coherency; /* 3148 4 */ int iommu_snooping; /* 3152 4 */ int iommu_count; /* 3156 4 */ int iommu_superpage; /* 3160 4 */ /* XXX 4 bytes hole, try to pack */ u64 max_addr; /* 3168 8 */ u32 default_pasid; /* 3176 4 */ /* XXX 4 bytes hole, try to pack */ struct iommu_domain domain; /* 3184 72 */ /* size: 3256, cachelines: 51, members: 18 */ /* sum members: 3245, holes: 3, sum holes: 11 */ /* forced alignments: 1 */ /* last cacheline: 56 bytes */ } __attribute__((__aligned__(8))); After arranging it for natural padding and to make flags as u8 bits, it saves 8 bytes for the struct. struct dmar_domain { int nid; /* 0 4 */ unsigned int iommu_refcnt[128]; /* 4 512 */ /* --- cacheline 8 boundary (512 bytes) was 4 bytes ago --- */ u16 iommu_did[128]; /* 516 256 */ /* --- cacheline 12 boundary (768 bytes) was 4 bytes ago --- */ u8 has_iotlb_device:1; /* 772: 0 1 */ u8 iommu_coherency:1; /* 772: 1 1 */ u8 iommu_snooping:1; /* 772: 2 1 */ /* XXX 5 bits hole, try to pack */ /* XXX 3 bytes hole, try to pack */ struct list_head devices; /* 776 16 */ struct list_head subdevices; /* 792 16 */ struct iova_domain iovad __attribute__((__aligned__(8))); /* 808 2320 */ /* --- cacheline 48 boundary (3072 bytes) was 56 bytes ago --- */ struct dma_pte * pgd; /* 3128 8 */ /* --- cacheline 49 boundary (3136 bytes) --- */ int gaw; /* 3136 4 */ int agaw; /* 3140 4 */ int flags; /* 3144 4 */ int iommu_count; /* 3148 4 */ int iommu_superpage; /* 3152 4 */ /* XXX 4 bytes hole, try to pack */ u64 max_addr; /* 3160 8 */ u32 default_pasid; /* 3168 4 */ /* XXX 4 bytes hole, try to pack */ struct iommu_domain domain; /* 3176 72 */ /* size: 3248, cachelines: 51, members: 18 */ /* sum members: 3236, holes: 3, sum holes: 11 */ /* sum bitfield members: 3 bits, bit holes: 1, sum bit holes: 5 bits */ /* forced alignments: 1 */ /* last cacheline: 48 bytes */ } __attribute__((__aligned__(8))); Signed-off-by: Parav Pandit Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210530075053.264218-1-parav@nvidia.com Link: https://lore.kernel.org/r/20210610020115.1637656-20-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 18 +++++++++--------- include/linux/intel-iommu.h | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 65458aee0d95..430ef2232d47 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -626,12 +626,12 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) bool found = false; int i; - domain->iommu_coherency = 1; + domain->iommu_coherency = true; for_each_domain_iommu(i, domain) { found = true; if (!iommu_paging_structure_coherency(g_iommus[i])) { - domain->iommu_coherency = 0; + domain->iommu_coherency = false; break; } } @@ -642,18 +642,18 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) rcu_read_lock(); for_each_active_iommu(iommu, drhd) { if (!iommu_paging_structure_coherency(iommu)) { - domain->iommu_coherency = 0; + domain->iommu_coherency = false; break; } } rcu_read_unlock(); } -static int domain_update_iommu_snooping(struct intel_iommu *skip) +static bool domain_update_iommu_snooping(struct intel_iommu *skip) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; - int ret = 1; + bool ret = true; rcu_read_lock(); for_each_active_iommu(iommu, drhd) { @@ -666,7 +666,7 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip) */ if (!sm_supported(iommu) && !ecap_sc_support(iommu->ecap)) { - ret = 0; + ret = false; break; } } @@ -4506,8 +4506,8 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) adjust_width = guestwidth_to_adjustwidth(guest_width); domain->agaw = width_to_agaw(adjust_width); - domain->iommu_coherency = 0; - domain->iommu_snooping = 0; + domain->iommu_coherency = false; + domain->iommu_snooping = false; domain->iommu_superpage = 0; domain->max_addr = 0; @@ -5131,7 +5131,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, static bool intel_iommu_capable(enum iommu_cap cap) { if (cap == IOMMU_CAP_CACHE_COHERENCY) - return domain_update_iommu_snooping(NULL) == 1; + return domain_update_iommu_snooping(NULL); if (cap == IOMMU_CAP_INTR_REMAP) return irq_remapping_enabled == 1; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index f5cf31dd7280..2621eff04c82 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -546,7 +546,10 @@ struct dmar_domain { * domain ids are 16 bit wide according * to VT-d spec, section 9.3 */ - bool has_iotlb_device; + u8 has_iotlb_device: 1; + u8 iommu_coherency: 1; /* indicate coherency of iommu access */ + u8 iommu_snooping: 1; /* indicate snooping control feature */ + struct list_head devices; /* all devices' list */ struct list_head subdevices; /* all subdevices' list */ struct iova_domain iovad; /* iova's that belong to this domain */ @@ -558,9 +561,6 @@ struct dmar_domain { int agaw; int flags; /* flags to find out type of domain */ - - int iommu_coherency;/* indicate coherency of iommu access */ - int iommu_snooping; /* indicate snooping control feature*/ int iommu_count; /* reference count of iommu */ int iommu_superpage;/* Level of superpages supported: 0 == 4KiB (no superpages), 1 == 2MiB, -- cgit v1.2.3 From 74f6d776ae0b8498cfdb574ab24992bd50a2a2f1 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 10 Jun 2021 10:01:12 +0800 Subject: iommu/vt-d: Removed unused iommu_count in dmar domain DMAR domain uses per DMAR refcount. It is indexed by iommu seq_id. Older iommu_count is only incremented and decremented but no decisions are taken based on this refcount. This is not of much use. Hence, remove iommu_count and further simplify domain_detach_iommu() by returning void. Signed-off-by: Parav Pandit Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210530075053.264218-1-parav@nvidia.com Link: https://lore.kernel.org/r/20210610020115.1637656-21-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 11 +++-------- include/linux/intel-iommu.h | 1 - 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 430ef2232d47..dd8ecfbfdb23 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1920,7 +1920,6 @@ static int domain_attach_iommu(struct dmar_domain *domain, assert_spin_locked(&iommu->lock); domain->iommu_refcnt[iommu->seq_id] += 1; - domain->iommu_count += 1; if (domain->iommu_refcnt[iommu->seq_id] == 1) { ndomains = cap_ndoms(iommu->cap); num = find_first_zero_bit(iommu->domain_ids, ndomains); @@ -1928,7 +1927,6 @@ static int domain_attach_iommu(struct dmar_domain *domain, if (num >= ndomains) { pr_err("%s: No free domain ids\n", iommu->name); domain->iommu_refcnt[iommu->seq_id] -= 1; - domain->iommu_count -= 1; return -ENOSPC; } @@ -1944,16 +1942,15 @@ static int domain_attach_iommu(struct dmar_domain *domain, return 0; } -static int domain_detach_iommu(struct dmar_domain *domain, - struct intel_iommu *iommu) +static void domain_detach_iommu(struct dmar_domain *domain, + struct intel_iommu *iommu) { - int num, count; + int num; assert_spin_locked(&device_domain_lock); assert_spin_locked(&iommu->lock); domain->iommu_refcnt[iommu->seq_id] -= 1; - count = --domain->iommu_count; if (domain->iommu_refcnt[iommu->seq_id] == 0) { num = domain->iommu_did[iommu->seq_id]; clear_bit(num, iommu->domain_ids); @@ -1962,8 +1959,6 @@ static int domain_detach_iommu(struct dmar_domain *domain, domain_update_iommu_cap(domain); domain->iommu_did[iommu->seq_id] = 0; } - - return count; } static inline int guestwidth_to_adjustwidth(int gaw) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 2621eff04c82..574b932dfe86 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -561,7 +561,6 @@ struct dmar_domain { int agaw; int flags; /* flags to find out type of domain */ - int iommu_count; /* reference count of iommu */ int iommu_superpage;/* Level of superpages supported: 0 == 4KiB (no superpages), 1 == 2MiB, 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ -- cgit v1.2.3 From 9739ba327c01e26f672661ea751132c29a54d3d9 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 10 Jun 2021 10:01:14 +0800 Subject: iommu/vt-d: Define counter explicitly as unsigned int Avoid below checkpatch warning. WARNING: Prefer 'unsigned int' to bare use of 'unsigned' + unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED]; Fixes: 29a27719abaa ("iommu/vt-d: Replace iommu_bmp with a refcount") Signed-off-by: Parav Pandit Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210530075053.264218-1-parav@nvidia.com Link: https://lore.kernel.org/r/20210610020115.1637656-23-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 574b932dfe86..d0fa0b31994d 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -537,7 +537,7 @@ struct context_entry { struct dmar_domain { int nid; /* node id */ - unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED]; + unsigned int iommu_refcnt[DMAR_UNITS_SUPPORTED]; /* Refcount of devices per iommu */ -- cgit v1.2.3 From db59e1b6e49201beacdbd0622aa3594f2de4f727 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 18 Jun 2021 17:20:56 +0200 Subject: ACPI: arm64: Move DMA setup operations out of IORT Extract generic DMA setup code out of IORT, so it can be reused by VIOT. Keep it in drivers/acpi/arm64 for now, since it could break x86 platforms that haven't run this code so far, if they have invalid tables. Reviewed-by: Eric Auger Signed-off-by: Jean-Philippe Brucker Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20210618152059.1194210-2-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- drivers/acpi/arm64/Makefile | 1 + drivers/acpi/arm64/dma.c | 50 +++++++++++++++++++++++++++++++++++++++++ drivers/acpi/arm64/iort.c | 54 +++++++-------------------------------------- drivers/acpi/scan.c | 2 +- include/linux/acpi.h | 3 +++ include/linux/acpi_iort.h | 6 ++--- 6 files changed, 66 insertions(+), 50 deletions(-) create mode 100644 drivers/acpi/arm64/dma.c (limited to 'include') diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index 6ff50f4ed947..66acbe77f46e 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_ACPI_IORT) += iort.o obj-$(CONFIG_ACPI_GTDT) += gtdt.o +obj-y += dma.o diff --git a/drivers/acpi/arm64/dma.c b/drivers/acpi/arm64/dma.c new file mode 100644 index 000000000000..f16739ad3cc0 --- /dev/null +++ b/drivers/acpi/arm64/dma.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include + +void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) +{ + int ret; + u64 end, mask; + u64 dmaaddr = 0, size = 0, offset = 0; + + /* + * If @dev is expected to be DMA-capable then the bus code that created + * it should have initialised its dma_mask pointer by this point. For + * now, we'll continue the legacy behaviour of coercing it to the + * coherent mask if not, but we'll no longer do so quietly. + */ + if (!dev->dma_mask) { + dev_warn(dev, "DMA mask not set\n"); + dev->dma_mask = &dev->coherent_dma_mask; + } + + if (dev->coherent_dma_mask) + size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1); + else + size = 1ULL << 32; + + ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size); + if (ret == -ENODEV) + ret = iort_dma_get_ranges(dev, &size); + if (!ret) { + /* + * Limit coherent and dma mask based on size retrieved from + * firmware. + */ + end = dmaaddr + size - 1; + mask = DMA_BIT_MASK(ilog2(end) + 1); + dev->bus_dma_limit = end; + dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); + *dev->dma_mask = min(*dev->dma_mask, mask); + } + + *dma_addr = dmaaddr; + *dma_size = size; + + ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size); + + dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : ""); +} diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 3912a1f6058e..a940be1cf2af 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1144,56 +1144,18 @@ static int rc_dma_get_range(struct device *dev, u64 *size) } /** - * iort_dma_setup() - Set-up device DMA parameters. + * iort_dma_get_ranges() - Look up DMA addressing limit for the device + * @dev: device to lookup + * @size: DMA range size result pointer * - * @dev: device to configure - * @dma_addr: device DMA address result pointer - * @dma_size: DMA range size result pointer + * Return: 0 on success, an error otherwise. */ -void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) +int iort_dma_get_ranges(struct device *dev, u64 *size) { - u64 end, mask, dmaaddr = 0, size = 0, offset = 0; - int ret; - - /* - * If @dev is expected to be DMA-capable then the bus code that created - * it should have initialised its dma_mask pointer by this point. For - * now, we'll continue the legacy behaviour of coercing it to the - * coherent mask if not, but we'll no longer do so quietly. - */ - if (!dev->dma_mask) { - dev_warn(dev, "DMA mask not set\n"); - dev->dma_mask = &dev->coherent_dma_mask; - } - - if (dev->coherent_dma_mask) - size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1); + if (dev_is_pci(dev)) + return rc_dma_get_range(dev, size); else - size = 1ULL << 32; - - ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size); - if (ret == -ENODEV) - ret = dev_is_pci(dev) ? rc_dma_get_range(dev, &size) - : nc_dma_get_range(dev, &size); - - if (!ret) { - /* - * Limit coherent and dma mask based on size retrieved from - * firmware. - */ - end = dmaaddr + size - 1; - mask = DMA_BIT_MASK(ilog2(end) + 1); - dev->bus_dma_limit = end; - dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); - *dev->dma_mask = min(*dev->dma_mask, mask); - } - - *dma_addr = dmaaddr; - *dma_size = size; - - ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size); - - dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : ""); + return nc_dma_get_range(dev, size); } static void __init acpi_iort_register_irq(int hwirq, const char *name, diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index e10d38ac7cf2..ea613df8f913 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1537,7 +1537,7 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, return 0; } - iort_dma_setup(dev, &dma_addr, &size); + acpi_arch_dma_setup(dev, &dma_addr, &size); iommu = iort_iommu_configure_id(dev, input_id); if (PTR_ERR(iommu) == -EPROBE_DEFER) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c60745f657e9..7aaa9559cc19 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -259,9 +259,12 @@ void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); #ifdef CONFIG_ARM64 void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa); +void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size); #else static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } +static inline void +acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) { } #endif int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 1a12baa58e40..f7f054833afd 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -34,7 +34,7 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 id, void acpi_configure_pmsi_domain(struct device *dev); int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ -void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size); +int iort_dma_get_ranges(struct device *dev, u64 *size); const struct iommu_ops *iort_iommu_configure_id(struct device *dev, const u32 *id_in); int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); @@ -48,8 +48,8 @@ static inline struct irq_domain *iort_get_device_domain( { return NULL; } static inline void acpi_configure_pmsi_domain(struct device *dev) { } /* IOMMU interface */ -static inline void iort_dma_setup(struct device *dev, u64 *dma_addr, - u64 *size) { } +static inline int iort_dma_get_ranges(struct device *dev, u64 *size) +{ return -ENODEV; } static inline const struct iommu_ops *iort_iommu_configure_id( struct device *dev, const u32 *id_in) { return NULL; } -- cgit v1.2.3 From 11a8c5e3a94b12848f24d9c63b5c175ce0b80729 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 18 Jun 2021 17:20:57 +0200 Subject: ACPI: Move IOMMU setup code out of IORT Extract the code that sets up the IOMMU infrastructure from IORT, since it can be reused by VIOT. Move it one level up into a new acpi_iommu_configure_id() function, which calls the IORT parsing function which in turn calls the acpi_iommu_fwspec_init() helper. Signed-off-by: Jean-Philippe Brucker Reviewed-by: Eric Auger Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20210618152059.1194210-3-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- drivers/acpi/arm64/iort.c | 74 +++++------------------------------------------ drivers/acpi/scan.c | 73 +++++++++++++++++++++++++++++++++++++++++++++- include/acpi/acpi_bus.h | 3 ++ include/linux/acpi_iort.h | 8 ++--- 4 files changed, 86 insertions(+), 72 deletions(-) (limited to 'include') diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index a940be1cf2af..487d1095030d 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -806,23 +806,6 @@ static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev) return NULL; } -static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev) -{ - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - - return (fwspec && fwspec->ops) ? fwspec->ops : NULL; -} - -static inline int iort_add_device_replay(struct device *dev) -{ - int err = 0; - - if (dev->bus && !device_iommu_mapped(dev)) - err = iommu_probe_device(dev); - - return err; -} - /** * iort_iommu_msi_get_resv_regions - Reserved region driver helper * @dev: Device from iommu_get_resv_regions() @@ -900,18 +883,6 @@ static inline bool iort_iommu_driver_enabled(u8 type) } } -static int arm_smmu_iort_xlate(struct device *dev, u32 streamid, - struct fwnode_handle *fwnode, - const struct iommu_ops *ops) -{ - int ret = iommu_fwspec_init(dev, fwnode, ops); - - if (!ret) - ret = iommu_fwspec_add_ids(dev, &streamid, 1); - - return ret; -} - static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node) { struct acpi_iort_root_complex *pci_rc; @@ -946,7 +917,7 @@ static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node, return iort_iommu_driver_enabled(node->type) ? -EPROBE_DEFER : -ENODEV; - return arm_smmu_iort_xlate(dev, streamid, iort_fwnode, ops); + return acpi_iommu_fwspec_init(dev, streamid, iort_fwnode, ops); } struct iort_pci_alias_info { @@ -1020,24 +991,13 @@ static int iort_nc_iommu_map_id(struct device *dev, * @dev: device to configure * @id_in: optional input id const value pointer * - * Returns: iommu_ops pointer on configuration success - * NULL on configuration failure + * Returns: 0 on success, <0 on failure */ -const struct iommu_ops *iort_iommu_configure_id(struct device *dev, - const u32 *id_in) +int iort_iommu_configure_id(struct device *dev, const u32 *id_in) { struct acpi_iort_node *node; - const struct iommu_ops *ops; int err = -ENODEV; - /* - * If we already translated the fwspec there - * is nothing left to do, return the iommu_ops. - */ - ops = iort_fwspec_iommu_ops(dev); - if (ops) - return ops; - if (dev_is_pci(dev)) { struct iommu_fwspec *fwspec; struct pci_bus *bus = to_pci_dev(dev)->bus; @@ -1046,7 +1006,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, node = iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX, iort_match_node_callback, &bus->dev); if (!node) - return NULL; + return -ENODEV; info.node = node; err = pci_for_each_dma_alias(to_pci_dev(dev), @@ -1059,7 +1019,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, node = iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT, iort_match_node_callback, dev); if (!node) - return NULL; + return -ENODEV; err = id_in ? iort_nc_iommu_map_id(dev, node, id_in) : iort_nc_iommu_map(dev, node); @@ -1068,32 +1028,14 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, iort_named_component_init(dev, node); } - /* - * If we have reason to believe the IOMMU driver missed the initial - * add_device callback for dev, replay it to get things in order. - */ - if (!err) { - ops = iort_fwspec_iommu_ops(dev); - err = iort_add_device_replay(dev); - } - - /* Ignore all other errors apart from EPROBE_DEFER */ - if (err == -EPROBE_DEFER) { - ops = ERR_PTR(err); - } else if (err) { - dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); - ops = NULL; - } - - return ops; + return err; } #else int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { return 0; } -const struct iommu_ops *iort_iommu_configure_id(struct device *dev, - const u32 *input_id) -{ return NULL; } +int iort_iommu_configure_id(struct device *dev, const u32 *input_id) +{ return -ENODEV; } #endif static int nc_dma_get_range(struct device *dev, u64 *size) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index ea613df8f913..2a2e690040e9 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -1520,6 +1521,76 @@ int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, return ret >= 0 ? 0 : ret; } +#ifdef CONFIG_IOMMU_API +int acpi_iommu_fwspec_init(struct device *dev, u32 id, + struct fwnode_handle *fwnode, + const struct iommu_ops *ops) +{ + int ret = iommu_fwspec_init(dev, fwnode, ops); + + if (!ret) + ret = iommu_fwspec_add_ids(dev, &id, 1); + + return ret; +} + +static inline const struct iommu_ops *acpi_iommu_fwspec_ops(struct device *dev) +{ + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + return fwspec ? fwspec->ops : NULL; +} + +static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, + const u32 *id_in) +{ + int err; + const struct iommu_ops *ops; + + /* + * If we already translated the fwspec there is nothing left to do, + * return the iommu_ops. + */ + ops = acpi_iommu_fwspec_ops(dev); + if (ops) + return ops; + + err = iort_iommu_configure_id(dev, id_in); + + /* + * If we have reason to believe the IOMMU driver missed the initial + * iommu_probe_device() call for dev, replay it to get things in order. + */ + if (!err && dev->bus && !device_iommu_mapped(dev)) + err = iommu_probe_device(dev); + + /* Ignore all other errors apart from EPROBE_DEFER */ + if (err == -EPROBE_DEFER) { + return ERR_PTR(err); + } else if (err) { + dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); + return NULL; + } + return acpi_iommu_fwspec_ops(dev); +} + +#else /* !CONFIG_IOMMU_API */ + +int acpi_iommu_fwspec_init(struct device *dev, u32 id, + struct fwnode_handle *fwnode, + const struct iommu_ops *ops) +{ + return -ENODEV; +} + +static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, + const u32 *id_in) +{ + return NULL; +} + +#endif /* !CONFIG_IOMMU_API */ + /** * acpi_dma_configure_id - Set-up DMA configuration for the device. * @dev: The pointer to the device @@ -1539,7 +1610,7 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, acpi_arch_dma_setup(dev, &dma_addr, &size); - iommu = iort_iommu_configure_id(dev, input_id); + iommu = acpi_iommu_configure_id(dev, input_id); if (PTR_ERR(iommu) == -EPROBE_DEFER) return -EPROBE_DEFER; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 3a82faac5767..41f092a269f6 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -588,6 +588,9 @@ struct acpi_pci_root { bool acpi_dma_supported(struct acpi_device *adev); enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); +int acpi_iommu_fwspec_init(struct device *dev, u32 id, + struct fwnode_handle *fwnode, + const struct iommu_ops *ops); int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, u64 *size); int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index f7f054833afd..f1f0842a2cb2 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -35,8 +35,7 @@ void acpi_configure_pmsi_domain(struct device *dev); int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ int iort_dma_get_ranges(struct device *dev, u64 *size); -const struct iommu_ops *iort_iommu_configure_id(struct device *dev, - const u32 *id_in); +int iort_iommu_configure_id(struct device *dev, const u32 *id_in); int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); phys_addr_t acpi_iort_dma_get_max_cpu_address(void); #else @@ -50,9 +49,8 @@ static inline void acpi_configure_pmsi_domain(struct device *dev) { } /* IOMMU interface */ static inline int iort_dma_get_ranges(struct device *dev, u64 *size) { return -ENODEV; } -static inline const struct iommu_ops *iort_iommu_configure_id( - struct device *dev, const u32 *id_in) -{ return NULL; } +static inline int iort_iommu_configure_id(struct device *dev, const u32 *id_in) +{ return -ENODEV; } static inline int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { return 0; } -- cgit v1.2.3 From 3cf485540e7b8550936ce3602edf2f58e4007304 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 18 Jun 2021 17:20:58 +0200 Subject: ACPI: Add driver for the VIOT table The ACPI Virtual I/O Translation Table describes topology of para-virtual platforms, similarly to vendor tables DMAR, IVRS and IORT. For now it describes the relation between virtio-iommu and the endpoints it manages. Three steps are needed to configure DMA of endpoints: (1) acpi_viot_init(): parse the VIOT table, find or create the fwnode associated to each vIOMMU device. This needs to happen after acpi_scan_init(), because it relies on the struct device and their fwnode to be available. (2) When probing the vIOMMU device, the driver registers its IOMMU ops within the IOMMU subsystem. This step doesn't require any intervention from the VIOT driver. (3) viot_iommu_configure(): before binding the endpoint to a driver, find the associated IOMMU ops. Register them, along with the endpoint ID, into the device's iommu_fwspec. If step (3) happens before step (2), it is deferred until the IOMMU is initialized, then retried. Tested-by: Eric Auger Reviewed-by: Eric Auger Signed-off-by: Jean-Philippe Brucker Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20210618152059.1194210-4-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- MAINTAINERS | 8 + drivers/acpi/Kconfig | 3 + drivers/acpi/Makefile | 2 + drivers/acpi/bus.c | 2 + drivers/acpi/scan.c | 3 + drivers/acpi/viot.c | 366 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/iommu/Kconfig | 1 + include/linux/acpi_viot.h | 19 +++ 8 files changed, 404 insertions(+) create mode 100644 drivers/acpi/viot.c create mode 100644 include/linux/acpi_viot.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 503fd21901f1..9f06619f7e41 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -431,6 +431,14 @@ W: https://01.org/linux-acpi B: https://bugzilla.kernel.org F: drivers/acpi/acpi_video.c +ACPI VIOT DRIVER +M: Jean-Philippe Brucker +L: linux-acpi@vger.kernel.org +L: iommu@lists.linux-foundation.org +S: Maintained +F: drivers/acpi/viot.c +F: include/linux/acpi_viot.h + ACPI WMI DRIVER L: platform-driver-x86@vger.kernel.org S: Orphan diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index eedec61e3476..3758c6940ed7 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -526,6 +526,9 @@ endif source "drivers/acpi/pmic/Kconfig" +config ACPI_VIOT + bool + endif # ACPI config X86_PM_TIMER diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 700b41adf2db..a6e644c48987 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -118,3 +118,5 @@ video-objs += acpi_video.o video_detect.o obj-y += dptf/ obj-$(CONFIG_ARM64) += arm64/ + +obj-$(CONFIG_ACPI_VIOT) += viot.o diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index be7da23fad76..4a96b7097a62 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -27,6 +27,7 @@ #include #endif #include +#include #include #include #include @@ -1345,6 +1346,7 @@ static int __init acpi_init(void) acpi_wakeup_device_init(); acpi_debugger_init(); acpi_setup_sb_notify_handler(); + acpi_viot_init(); return 0; } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 2a2e690040e9..3e2bb04ab528 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -1556,6 +1557,8 @@ static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, return ops; err = iort_iommu_configure_id(dev, id_in); + if (err && err != -EPROBE_DEFER) + err = viot_iommu_configure(dev); /* * If we have reason to believe the IOMMU driver missed the initial diff --git a/drivers/acpi/viot.c b/drivers/acpi/viot.c new file mode 100644 index 000000000000..d2256326c73a --- /dev/null +++ b/drivers/acpi/viot.c @@ -0,0 +1,366 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Virtual I/O topology + * + * The Virtual I/O Translation Table (VIOT) describes the topology of + * para-virtual IOMMUs and the endpoints they manage. The OS uses it to + * initialize devices in the right order, preventing endpoints from issuing DMA + * before their IOMMU is ready. + * + * When binding a driver to a device, before calling the device driver's probe() + * method, the driver infrastructure calls dma_configure(). At that point the + * VIOT driver looks for an IOMMU associated to the device in the VIOT table. + * If an IOMMU exists and has been initialized, the VIOT driver initializes the + * device's IOMMU fwspec, allowing the DMA infrastructure to invoke the IOMMU + * ops when the device driver configures DMA mappings. If an IOMMU exists and + * hasn't yet been initialized, VIOT returns -EPROBE_DEFER to postpone probing + * the device until the IOMMU is available. + */ +#define pr_fmt(fmt) "ACPI: VIOT: " fmt + +#include +#include +#include +#include +#include +#include +#include + +struct viot_iommu { + /* Node offset within the table */ + unsigned int offset; + struct fwnode_handle *fwnode; + struct list_head list; +}; + +struct viot_endpoint { + union { + /* PCI range */ + struct { + u16 segment_start; + u16 segment_end; + u16 bdf_start; + u16 bdf_end; + }; + /* MMIO */ + u64 address; + }; + u32 endpoint_id; + struct viot_iommu *viommu; + struct list_head list; +}; + +static struct acpi_table_viot *viot; +static LIST_HEAD(viot_iommus); +static LIST_HEAD(viot_pci_ranges); +static LIST_HEAD(viot_mmio_endpoints); + +static int __init viot_check_bounds(const struct acpi_viot_header *hdr) +{ + struct acpi_viot_header *start, *end, *hdr_end; + + start = ACPI_ADD_PTR(struct acpi_viot_header, viot, + max_t(size_t, sizeof(*viot), viot->node_offset)); + end = ACPI_ADD_PTR(struct acpi_viot_header, viot, viot->header.length); + hdr_end = ACPI_ADD_PTR(struct acpi_viot_header, hdr, sizeof(*hdr)); + + if (hdr < start || hdr_end > end) { + pr_err(FW_BUG "Node pointer overflows\n"); + return -EOVERFLOW; + } + if (hdr->length < sizeof(*hdr)) { + pr_err(FW_BUG "Empty node\n"); + return -EINVAL; + } + return 0; +} + +static int __init viot_get_pci_iommu_fwnode(struct viot_iommu *viommu, + u16 segment, u16 bdf) +{ + struct pci_dev *pdev; + struct fwnode_handle *fwnode; + + pdev = pci_get_domain_bus_and_slot(segment, PCI_BUS_NUM(bdf), + bdf & 0xff); + if (!pdev) { + pr_err("Could not find PCI IOMMU\n"); + return -ENODEV; + } + + fwnode = pdev->dev.fwnode; + if (!fwnode) { + /* + * PCI devices aren't necessarily described by ACPI. Create a + * fwnode so the IOMMU subsystem can identify this device. + */ + fwnode = acpi_alloc_fwnode_static(); + if (!fwnode) { + pci_dev_put(pdev); + return -ENOMEM; + } + set_primary_fwnode(&pdev->dev, fwnode); + } + viommu->fwnode = pdev->dev.fwnode; + pci_dev_put(pdev); + return 0; +} + +static int __init viot_get_mmio_iommu_fwnode(struct viot_iommu *viommu, + u64 address) +{ + struct acpi_device *adev; + struct resource res = { + .start = address, + .end = address, + .flags = IORESOURCE_MEM, + }; + + adev = acpi_resource_consumer(&res); + if (!adev) { + pr_err("Could not find MMIO IOMMU\n"); + return -EINVAL; + } + viommu->fwnode = &adev->fwnode; + return 0; +} + +static struct viot_iommu * __init viot_get_iommu(unsigned int offset) +{ + int ret; + struct viot_iommu *viommu; + struct acpi_viot_header *hdr = ACPI_ADD_PTR(struct acpi_viot_header, + viot, offset); + union { + struct acpi_viot_virtio_iommu_pci pci; + struct acpi_viot_virtio_iommu_mmio mmio; + } *node = (void *)hdr; + + list_for_each_entry(viommu, &viot_iommus, list) + if (viommu->offset == offset) + return viommu; + + if (viot_check_bounds(hdr)) + return NULL; + + viommu = kzalloc(sizeof(*viommu), GFP_KERNEL); + if (!viommu) + return NULL; + + viommu->offset = offset; + switch (hdr->type) { + case ACPI_VIOT_NODE_VIRTIO_IOMMU_PCI: + if (hdr->length < sizeof(node->pci)) + goto err_free; + + ret = viot_get_pci_iommu_fwnode(viommu, node->pci.segment, + node->pci.bdf); + break; + case ACPI_VIOT_NODE_VIRTIO_IOMMU_MMIO: + if (hdr->length < sizeof(node->mmio)) + goto err_free; + + ret = viot_get_mmio_iommu_fwnode(viommu, + node->mmio.base_address); + break; + default: + ret = -EINVAL; + } + if (ret) + goto err_free; + + list_add(&viommu->list, &viot_iommus); + return viommu; + +err_free: + kfree(viommu); + return NULL; +} + +static int __init viot_parse_node(const struct acpi_viot_header *hdr) +{ + int ret = -EINVAL; + struct list_head *list; + struct viot_endpoint *ep; + union { + struct acpi_viot_mmio mmio; + struct acpi_viot_pci_range pci; + } *node = (void *)hdr; + + if (viot_check_bounds(hdr)) + return -EINVAL; + + if (hdr->type == ACPI_VIOT_NODE_VIRTIO_IOMMU_PCI || + hdr->type == ACPI_VIOT_NODE_VIRTIO_IOMMU_MMIO) + return 0; + + ep = kzalloc(sizeof(*ep), GFP_KERNEL); + if (!ep) + return -ENOMEM; + + switch (hdr->type) { + case ACPI_VIOT_NODE_PCI_RANGE: + if (hdr->length < sizeof(node->pci)) { + pr_err(FW_BUG "Invalid PCI node size\n"); + goto err_free; + } + + ep->segment_start = node->pci.segment_start; + ep->segment_end = node->pci.segment_end; + ep->bdf_start = node->pci.bdf_start; + ep->bdf_end = node->pci.bdf_end; + ep->endpoint_id = node->pci.endpoint_start; + ep->viommu = viot_get_iommu(node->pci.output_node); + list = &viot_pci_ranges; + break; + case ACPI_VIOT_NODE_MMIO: + if (hdr->length < sizeof(node->mmio)) { + pr_err(FW_BUG "Invalid MMIO node size\n"); + goto err_free; + } + + ep->address = node->mmio.base_address; + ep->endpoint_id = node->mmio.endpoint; + ep->viommu = viot_get_iommu(node->mmio.output_node); + list = &viot_mmio_endpoints; + break; + default: + pr_warn("Unsupported node %x\n", hdr->type); + ret = 0; + goto err_free; + } + + if (!ep->viommu) { + pr_warn("No IOMMU node found\n"); + /* + * A future version of the table may use the node for other + * purposes. Keep parsing. + */ + ret = 0; + goto err_free; + } + + list_add(&ep->list, list); + return 0; + +err_free: + kfree(ep); + return ret; +} + +/** + * acpi_viot_init - Parse the VIOT table + * + * Parse the VIOT table, prepare the list of endpoints to be used during DMA + * setup of devices. + */ +void __init acpi_viot_init(void) +{ + int i; + acpi_status status; + struct acpi_table_header *hdr; + struct acpi_viot_header *node; + + status = acpi_get_table(ACPI_SIG_VIOT, 0, &hdr); + if (ACPI_FAILURE(status)) { + if (status != AE_NOT_FOUND) { + const char *msg = acpi_format_exception(status); + + pr_err("Failed to get table, %s\n", msg); + } + return; + } + + viot = (void *)hdr; + + node = ACPI_ADD_PTR(struct acpi_viot_header, viot, viot->node_offset); + for (i = 0; i < viot->node_count; i++) { + if (viot_parse_node(node)) + return; + + node = ACPI_ADD_PTR(struct acpi_viot_header, node, + node->length); + } + + acpi_put_table(hdr); +} + +static int viot_dev_iommu_init(struct device *dev, struct viot_iommu *viommu, + u32 epid) +{ + const struct iommu_ops *ops; + + if (!viommu) + return -ENODEV; + + /* We're not translating ourself */ + if (viommu->fwnode == dev->fwnode) + return -EINVAL; + + ops = iommu_ops_from_fwnode(viommu->fwnode); + if (!ops) + return IS_ENABLED(CONFIG_VIRTIO_IOMMU) ? + -EPROBE_DEFER : -ENODEV; + + return acpi_iommu_fwspec_init(dev, epid, viommu->fwnode, ops); +} + +static int viot_pci_dev_iommu_init(struct pci_dev *pdev, u16 dev_id, void *data) +{ + u32 epid; + struct viot_endpoint *ep; + u32 domain_nr = pci_domain_nr(pdev->bus); + + list_for_each_entry(ep, &viot_pci_ranges, list) { + if (domain_nr >= ep->segment_start && + domain_nr <= ep->segment_end && + dev_id >= ep->bdf_start && + dev_id <= ep->bdf_end) { + epid = ((domain_nr - ep->segment_start) << 16) + + dev_id - ep->bdf_start + ep->endpoint_id; + + /* + * If we found a PCI range managed by the viommu, we're + * the one that has to request ACS. + */ + pci_request_acs(); + + return viot_dev_iommu_init(&pdev->dev, ep->viommu, + epid); + } + } + return -ENODEV; +} + +static int viot_mmio_dev_iommu_init(struct platform_device *pdev) +{ + struct resource *mem; + struct viot_endpoint *ep; + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) + return -ENODEV; + + list_for_each_entry(ep, &viot_mmio_endpoints, list) { + if (ep->address == mem->start) + return viot_dev_iommu_init(&pdev->dev, ep->viommu, + ep->endpoint_id); + } + return -ENODEV; +} + +/** + * viot_iommu_configure - Setup IOMMU ops for an endpoint described by VIOT + * @dev: the endpoint + * + * Return: 0 on success, <0 on failure + */ +int viot_iommu_configure(struct device *dev) +{ + if (dev_is_pci(dev)) + return pci_for_each_dma_alias(to_pci_dev(dev), + viot_pci_dev_iommu_init, NULL); + else if (dev_is_platform(dev)) + return viot_mmio_dev_iommu_init(to_platform_device(dev)); + return -ENODEV; +} diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 1f111b399bca..aff8a4830dd1 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -403,6 +403,7 @@ config VIRTIO_IOMMU depends on ARM64 select IOMMU_API select INTERVAL_TREE + select ACPI_VIOT if ACPI help Para-virtualised IOMMU driver with virtio. diff --git a/include/linux/acpi_viot.h b/include/linux/acpi_viot.h new file mode 100644 index 000000000000..1eb8ee5b0e5f --- /dev/null +++ b/include/linux/acpi_viot.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ACPI_VIOT_H__ +#define __ACPI_VIOT_H__ + +#include + +#ifdef CONFIG_ACPI_VIOT +void __init acpi_viot_init(void); +int viot_iommu_configure(struct device *dev); +#else +static inline void acpi_viot_init(void) {} +static inline int viot_iommu_configure(struct device *dev) +{ + return -ENODEV; +} +#endif + +#endif /* __ACPI_VIOT_H__ */ -- cgit v1.2.3 From ac6d704679d343e55615551f19e9b2e18d68518b Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 18 Jun 2021 17:20:59 +0200 Subject: iommu/dma: Pass address limit rather than size to iommu_setup_dma_ops() Passing a 64-bit address width to iommu_setup_dma_ops() is valid on virtual platforms, but isn't currently possible. The overflow check in iommu_dma_init_domain() prevents this even when @dma_base isn't 0. Pass a limit address instead of a size, so callers don't have to fake a size to work around the check. The base and limit parameters are being phased out, because: * they are redundant for x86 callers. dma-iommu already reserves the first page, and the upper limit is already in domain->geometry. * they can now be obtained from dev->dma_range_map on Arm. But removing them on Arm isn't completely straightforward so is left for future work. As an intermediate step, simplify the x86 callers by passing dummy limits. Signed-off-by: Jean-Philippe Brucker Reviewed-by: Eric Auger Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20210618152059.1194210-5-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- arch/arm64/mm/dma-mapping.c | 2 +- drivers/iommu/amd/iommu.c | 2 +- drivers/iommu/dma-iommu.c | 12 ++++++------ drivers/iommu/intel/iommu.c | 5 +---- include/linux/dma-iommu.h | 4 ++-- 5 files changed, 11 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 4bf1dd3eb041..6719f9efea09 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -50,7 +50,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, dev->dma_coherent = coherent; if (iommu) - iommu_setup_dma_ops(dev, dma_base, size); + iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1); #ifdef CONFIG_XEN if (xen_swiotlb_detect()) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3ac42bbdefc6..216323fb27ef 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1713,7 +1713,7 @@ static void amd_iommu_probe_finalize(struct device *dev) /* Domains are initialized for this device - have a look what we ended up with */ domain = iommu_get_domain_for_dev(dev); if (domain->type == IOMMU_DOMAIN_DMA) - iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, 0); + iommu_setup_dma_ops(dev, 0, U64_MAX); else set_dma_ops(dev, NULL); } diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 7bcdd1205535..c62e19bed302 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -319,16 +319,16 @@ static bool dev_is_untrusted(struct device *dev) * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() * @base: IOVA at which the mappable address space starts - * @size: Size of IOVA space + * @limit: Last address of the IOVA space * @dev: Device the domain is being initialised for * - * @base and @size should be exact multiples of IOMMU page granularity to + * @base and @limit + 1 should be exact multiples of IOMMU page granularity to * avoid rounding surprises. If necessary, we reserve the page at address 0 * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but * any change which could make prior IOVAs invalid will fail. */ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, - u64 size, struct device *dev) + dma_addr_t limit, struct device *dev) { struct iommu_dma_cookie *cookie = domain->iova_cookie; unsigned long order, base_pfn; @@ -346,7 +346,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, /* Check the domain allows at least some access to the device... */ if (domain->geometry.force_aperture) { if (base > domain->geometry.aperture_end || - base + size <= domain->geometry.aperture_start) { + limit < domain->geometry.aperture_start) { pr_warn("specified DMA range outside IOMMU capability\n"); return -EFAULT; } @@ -1308,7 +1308,7 @@ static const struct dma_map_ops iommu_dma_ops = { * The IOMMU core code allocates the default DMA domain, which the underlying * IOMMU driver needs to support via the dma-iommu layer. */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size) +void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); @@ -1320,7 +1320,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size) * underlying IOMMU driver needs to support via the dma-iommu layer. */ if (domain->type == IOMMU_DOMAIN_DMA) { - if (iommu_dma_init_domain(domain, dma_base, size, dev)) + if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev)) goto out_err; dev->dma_ops = &iommu_dma_ops; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index be35284a2016..2f7213f0e7a1 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5165,13 +5165,10 @@ static void intel_iommu_release_device(struct device *dev) static void intel_iommu_probe_finalize(struct device *dev) { - dma_addr_t base = IOVA_START_PFN << VTD_PAGE_SHIFT; struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct dmar_domain *dmar_domain = to_dmar_domain(domain); if (domain && domain->type == IOMMU_DOMAIN_DMA) - iommu_setup_dma_ops(dev, base, - __DOMAIN_MAX_ADDR(dmar_domain->gaw) - base); + iommu_setup_dma_ops(dev, 0, U64_MAX); else set_dma_ops(dev, NULL); } diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 6e75a2d689b4..758ca4694257 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -19,7 +19,7 @@ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); void iommu_put_dma_cookie(struct iommu_domain *domain); /* Setup call for arch DMA mapping code */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size); +void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); /* The DMA API isn't _quite_ the whole story, though... */ /* @@ -50,7 +50,7 @@ struct msi_msg; struct device; static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, - u64 size) + u64 dma_limit) { } -- cgit v1.2.3