diff options
Diffstat (limited to 'drivers/iommu/arm')
-rw-r--r-- | drivers/iommu/arm/Kconfig | 144 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 140 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 110 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c | 2 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 554 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 141 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 537 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu/arm-smmu-impl.c | 5 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 11 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 157 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h | 3 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu/arm-smmu.c | 74 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu/arm-smmu.h | 2 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu/qcom_iommu.c | 10 |
14 files changed, 1424 insertions, 466 deletions
diff --git a/drivers/iommu/arm/Kconfig b/drivers/iommu/arm/Kconfig new file mode 100644 index 000000000000..ef42bbe07dbe --- /dev/null +++ b/drivers/iommu/arm/Kconfig @@ -0,0 +1,144 @@ +# SPDX-License-Identifier: GPL-2.0-only +# ARM IOMMU support +config ARM_SMMU + tristate "ARM Ltd. System MMU (SMMU) Support" + depends on ARM64 || ARM || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE + select IOMMU_API + select IOMMU_IO_PGTABLE_LPAE + select ARM_DMA_USE_IOMMU if ARM + help + Support for implementations of the ARM System MMU architecture + versions 1 and 2. + + Say Y here if your SoC includes an IOMMU device implementing + the ARM SMMU architecture. + +if ARM_SMMU +config ARM_SMMU_LEGACY_DT_BINDINGS + bool "Support the legacy \"mmu-masters\" devicetree bindings" + depends on ARM_SMMU=y && OF + help + Support for the badly designed and deprecated "mmu-masters" + devicetree bindings. This allows some DMA masters to attach + to the SMMU but does not provide any support via the DMA API. + If you're lucky, you might be able to get VFIO up and running. + + If you say Y here then you'll make me very sad. Instead, say N + and move your firmware to the utopian future that was 2016. + +config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT + bool "Disable unmatched stream bypass by default" if EXPERT + default y + help + If your firmware is broken and fails to describe StreamIDs which + Linux should know about in order to manage the SMMU correctly and + securely, and you don't want to boot with the 'arm-smmu.disable_bypass=0' + command line parameter, then as a last resort you can turn it off + by default here. But don't. This option may be removed at any time. + + Note that 'arm-smmu.disable_bypass=1' will still take precedence. + +config ARM_SMMU_MMU_500_CPRE_ERRATA + bool "Enable errata workaround for CPRE in SMMU reset path" + default y + help + Say Y here (by default) to apply workaround to disable + MMU-500's next-page prefetcher for sake of 4 known errata. + + Say N here only when it is sure that any errata related to + prefetch enablement are not applicable on the platform. + Refer silicon-errata.rst for info on errata IDs. + +config ARM_SMMU_QCOM + def_tristate y + depends on ARCH_QCOM + select QCOM_SCM + help + When running on a Qualcomm platform that has the custom variant + of the ARM SMMU, this needs to be built into the SMMU driver. + +config ARM_SMMU_QCOM_DEBUG + bool "ARM SMMU QCOM implementation defined debug support" + depends on ARM_SMMU_QCOM=y + help + Support for implementation specific debug features in ARM SMMU + hardware found in QTI platforms. This include support for + the Translation Buffer Units (TBU) that can be used to obtain + additional information when debugging memory management issues + like context faults. + + Say Y here to enable debug for issues such as context faults + or TLB sync timeouts which requires implementation defined + register dumps. +endif + +config ARM_SMMU_V3 + tristate "ARM Ltd. System MMU Version 3 (SMMUv3) Support" + depends on ARM64 + select IOMMU_API + select IOMMU_IO_PGTABLE_LPAE + select GENERIC_MSI_IRQ + select IOMMUFD_DRIVER if IOMMUFD + help + Support for implementations of the ARM System MMU architecture + version 3 providing translation support to a PCIe root complex. + + Say Y here if your system includes an IOMMU device implementing + the ARM SMMUv3 architecture. + +if ARM_SMMU_V3 +config ARM_SMMU_V3_SVA + bool "Shared Virtual Addressing support for the ARM SMMUv3" + select IOMMU_SVA + select IOMMU_IOPF + select MMU_NOTIFIER + help + Support for sharing process address spaces with devices using the + SMMUv3. + + Say Y here if your system supports SVA extensions such as PCIe PASID + and PRI. + +config ARM_SMMU_V3_IOMMUFD + bool "Enable IOMMUFD features for ARM SMMUv3 (EXPERIMENTAL)" + depends on IOMMUFD + help + Support for IOMMUFD features intended to support virtual machines + with accelerated virtual IOMMUs. + + Say Y here if you are doing development and testing on this feature. + +config ARM_SMMU_V3_KUNIT_TEST + tristate "KUnit tests for arm-smmu-v3 driver" if !KUNIT_ALL_TESTS + depends on KUNIT + depends on ARM_SMMU_V3_SVA + default KUNIT_ALL_TESTS + help + Enable this option to unit-test arm-smmu-v3 driver functions. + + If unsure, say N. + +config TEGRA241_CMDQV + bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3" + depends on ACPI + help + Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The + CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues + support, except with virtualization capabilities. + + Say Y here if your system is NVIDIA Tegra241 (Grace) or it has the same + CMDQ-V extension. +endif + +config QCOM_IOMMU + # Note: iommu drivers cannot (yet?) be built as modules + bool "Qualcomm IOMMU Support" + depends on ARCH_QCOM || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE + select QCOM_SCM + select IOMMU_API + select IOMMU_IO_PGTABLE_LPAE + select ARM_DMA_USE_IOMMU + help + Support for IOMMU on certain Qualcomm SoCs. diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index 6cc14d82399f..8cd8929bbfdf 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -7,13 +7,22 @@ #include "arm-smmu-v3.h" -void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type) +void *arm_smmu_hw_info(struct device *dev, u32 *length, + enum iommu_hw_info_type *type) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); + const struct arm_smmu_impl_ops *impl_ops = master->smmu->impl_ops; struct iommu_hw_info_arm_smmuv3 *info; u32 __iomem *base_idr; unsigned int i; + if (*type != IOMMU_HW_INFO_TYPE_DEFAULT && + *type != IOMMU_HW_INFO_TYPE_ARM_SMMUV3) { + if (!impl_ops || !impl_ops->hw_info) + return ERR_PTR(-EOPNOTSUPP); + return impl_ops->hw_info(master->smmu, length, type); + } + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return ERR_PTR(-ENOMEM); @@ -43,6 +52,8 @@ static void arm_smmu_make_nested_cd_table_ste( target->data[0] |= nested_domain->ste[0] & ~cpu_to_le64(STRTAB_STE_0_CFG); target->data[1] |= nested_domain->ste[1]; + /* Merge events for DoS mitigations on eventq */ + target->data[1] |= cpu_to_le64(STRTAB_STE_1_MEV); } /* @@ -85,6 +96,47 @@ static void arm_smmu_make_nested_domain_ste( } } +int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state, + struct arm_smmu_nested_domain *nested_domain) +{ + struct arm_smmu_vmaster *vmaster; + unsigned long vsid; + int ret; + + iommu_group_mutex_assert(state->master->dev); + + ret = iommufd_viommu_get_vdev_id(&nested_domain->vsmmu->core, + state->master->dev, &vsid); + if (ret) + return ret; + + vmaster = kzalloc(sizeof(*vmaster), GFP_KERNEL); + if (!vmaster) + return -ENOMEM; + vmaster->vsmmu = nested_domain->vsmmu; + vmaster->vsid = vsid; + state->vmaster = vmaster; + + return 0; +} + +void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state) +{ + struct arm_smmu_master *master = state->master; + + mutex_lock(&master->smmu->streams_mutex); + kfree(master->vmaster); + master->vmaster = state->vmaster; + mutex_unlock(&master->smmu->streams_mutex); +} + +void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master) +{ + struct arm_smmu_attach_state state = { .master = master }; + + arm_smmu_attach_commit_vmaster(&state); +} + static int arm_smmu_attach_dev_nested(struct iommu_domain *domain, struct device *dev) { @@ -173,23 +225,17 @@ static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg, return 0; } -static struct iommu_domain * +struct iommu_domain * arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, const struct iommu_user_data *user_data) { struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core); - const u32 SUPPORTED_FLAGS = IOMMU_HWPT_FAULT_ID_VALID; struct arm_smmu_nested_domain *nested_domain; struct iommu_hwpt_arm_smmuv3 arg; bool enable_ats = false; int ret; - /* - * Faults delivered to the nested domain are faults that originated by - * the S1 in the domain. The core code will match all PASIDs when - * delivering the fault due to user_pasid_table - */ - if (flags & ~SUPPORTED_FLAGS) + if (flags) return ERR_PTR(-EOPNOTSUPP); ret = iommu_copy_struct_from_user(&arg, user_data, @@ -290,8 +336,8 @@ static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu, return 0; } -static int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu, - struct iommu_user_data_array *array) +int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu, + struct iommu_user_data_array *array) { struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core); struct arm_smmu_device *smmu = vsmmu->smmu; @@ -345,25 +391,14 @@ static const struct iommufd_viommu_ops arm_vsmmu_ops = { .cache_invalidate = arm_vsmmu_cache_invalidate, }; -struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, - struct iommu_domain *parent, - struct iommufd_ctx *ictx, - unsigned int viommu_type) +size_t arm_smmu_get_viommu_size(struct device *dev, + enum iommu_viommu_type viommu_type) { - struct arm_smmu_device *smmu = - iommu_get_iommu_dev(dev, struct arm_smmu_device, iommu); struct arm_smmu_master *master = dev_iommu_priv_get(dev); - struct arm_smmu_domain *s2_parent = to_smmu_domain(parent); - struct arm_vsmmu *vsmmu; - - if (viommu_type != IOMMU_VIOMMU_TYPE_ARM_SMMUV3) - return ERR_PTR(-EOPNOTSUPP); + struct arm_smmu_device *smmu = master->smmu; if (!(smmu->features & ARM_SMMU_FEAT_NESTING)) - return ERR_PTR(-EOPNOTSUPP); - - if (s2_parent->smmu != master->smmu) - return ERR_PTR(-EINVAL); + return 0; /* * FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW @@ -371,7 +406,7 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, * any change to remove this. */ if (WARN_ON(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) - return ERR_PTR(-EOPNOTSUPP); + return 0; /* * Must support some way to prevent the VM from bypassing the cache @@ -383,19 +418,56 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, */ if (!arm_smmu_master_canwbs(master) && !(smmu->features & ARM_SMMU_FEAT_S2FWB)) - return ERR_PTR(-EOPNOTSUPP); + return 0; + + if (viommu_type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3) + return VIOMMU_STRUCT_SIZE(struct arm_vsmmu, core); + + if (!smmu->impl_ops || !smmu->impl_ops->get_viommu_size) + return 0; + return smmu->impl_ops->get_viommu_size(viommu_type); +} + +int arm_vsmmu_init(struct iommufd_viommu *viommu, + struct iommu_domain *parent_domain, + const struct iommu_user_data *user_data) +{ + struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core); + struct arm_smmu_device *smmu = + container_of(viommu->iommu_dev, struct arm_smmu_device, iommu); + struct arm_smmu_domain *s2_parent = to_smmu_domain(parent_domain); - vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core, - &arm_vsmmu_ops); - if (IS_ERR(vsmmu)) - return ERR_CAST(vsmmu); + if (s2_parent->smmu != smmu) + return -EINVAL; vsmmu->smmu = smmu; vsmmu->s2_parent = s2_parent; /* FIXME Move VMID allocation from the S2 domain allocation to here */ vsmmu->vmid = s2_parent->s2_cfg.vmid; - return &vsmmu->core; + if (viommu->type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3) { + viommu->ops = &arm_vsmmu_ops; + return 0; + } + + return smmu->impl_ops->vsmmu_init(vsmmu, user_data); +} + +int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt) +{ + struct iommu_vevent_arm_smmuv3 vevt; + int i; + + lockdep_assert_held(&vmaster->vsmmu->smmu->streams_mutex); + + vevt.evt[0] = cpu_to_le64((evt[0] & ~EVTQ_0_SID) | + FIELD_PREP(EVTQ_0_SID, vmaster->vsid)); + for (i = 1; i < EVTQ_ENT_DWORDS; i++) + vevt.evt[i] = cpu_to_le64(evt[i]); + + return iommufd_viommu_report_event(&vmaster->vsmmu->core, + IOMMU_VEVENTQ_TYPE_ARM_SMMUV3, &vevt, + sizeof(vevt)); } -MODULE_IMPORT_NS(IOMMUFD); +MODULE_IMPORT_NS("IOMMUFD"); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 1d3e71569775..59a480974d80 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -13,8 +13,6 @@ #include "arm-smmu-v3.h" #include "../../io-pgtable-arm.h" -static DEFINE_MUTEX(sva_lock); - static void __maybe_unused arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) { @@ -112,6 +110,15 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, * from the current CPU register */ target->data[3] = cpu_to_le64(read_sysreg(mair_el1)); + + /* + * Note that we don't bother with S1PIE on the SMMU, we just rely on + * our default encoding scheme matching direct permissions anyway. + * SMMU has no notion of S1POE nor GCS, so make sure that is clear if + * either is enabled for CPUs, just in case anyone imagines otherwise. + */ + if (system_supports_poe() || system_supports_gcs()) + dev_warn_once(master->smmu->dev, "SVA devices ignore permission overlays and GCS\n"); } EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_sva_cd); @@ -206,8 +213,15 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) unsigned long asid_bits; u32 feat_mask = ARM_SMMU_FEAT_COHERENCY; - if (vabits_actual == 52) + if (vabits_actual == 52) { + /* We don't support LPA2 */ + if (PAGE_SIZE != SZ_64K) + return false; feat_mask |= ARM_SMMU_FEAT_VAX; + } + + if (system_supports_bbml2_noabort()) + feat_mask |= ARM_SMMU_FEAT_BBML2; if ((smmu->features & feat_mask) != feat_mask) return false; @@ -244,84 +258,6 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) return true; } -bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master) -{ - /* We're not keeping track of SIDs in fault events */ - if (master->num_streams != 1) - return false; - - return master->stall_enabled; -} - -bool arm_smmu_master_sva_supported(struct arm_smmu_master *master) -{ - if (!(master->smmu->features & ARM_SMMU_FEAT_SVA)) - return false; - - /* SSID support is mandatory for the moment */ - return master->ssid_bits; -} - -bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master) -{ - bool enabled; - - mutex_lock(&sva_lock); - enabled = master->sva_enabled; - mutex_unlock(&sva_lock); - return enabled; -} - -static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master) -{ - struct device *dev = master->dev; - - /* - * Drivers for devices supporting PRI or stall should enable IOPF first. - * Others have device-specific fault handlers and don't need IOPF. - */ - if (!arm_smmu_master_iopf_supported(master)) - return 0; - - if (!master->iopf_enabled) - return -EINVAL; - - return iopf_queue_add_device(master->smmu->evtq.iopf, dev); -} - -static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master) -{ - struct device *dev = master->dev; - - if (!master->iopf_enabled) - return; - - iopf_queue_remove_device(master->smmu->evtq.iopf, dev); -} - -int arm_smmu_master_enable_sva(struct arm_smmu_master *master) -{ - int ret; - - mutex_lock(&sva_lock); - ret = arm_smmu_master_sva_enable_iopf(master); - if (!ret) - master->sva_enabled = true; - mutex_unlock(&sva_lock); - - return ret; -} - -int arm_smmu_master_disable_sva(struct arm_smmu_master *master) -{ - mutex_lock(&sva_lock); - arm_smmu_master_sva_disable_iopf(master); - master->sva_enabled = false; - mutex_unlock(&sva_lock); - - return 0; -} - void arm_smmu_sva_notifier_synchronize(void) { /* @@ -340,6 +276,9 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, struct arm_smmu_cd target; int ret; + if (!(master->smmu->features & ARM_SMMU_FEAT_SVA)) + return -EOPNOTSUPP; + /* Prevent arm_smmu_mm_release from being called while we are attaching */ if (!mmget_not_zero(domain->mm)) return -EINVAL; @@ -393,11 +332,20 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, u32 asid; int ret; + if (!(master->smmu->features & ARM_SMMU_FEAT_SVA)) + return ERR_PTR(-EOPNOTSUPP); + smmu_domain = arm_smmu_domain_alloc(); if (IS_ERR(smmu_domain)) return ERR_CAST(smmu_domain); smmu_domain->domain.type = IOMMU_DOMAIN_SVA; smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; + + /* + * Choose page_size as the leaf page size for invalidation when + * ARM_SMMU_FEAT_RANGE_INV is present + */ + smmu_domain->domain.pgsize_bitmap = PAGE_SIZE; smmu_domain->smmu = smmu; ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c index 84baa021370a..d2671bfd3798 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c @@ -607,6 +607,6 @@ static struct kunit_suite arm_smmu_v3_test_module = { }; kunit_test_suites(&arm_smmu_v3_test_module); -MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); +MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING"); MODULE_DESCRIPTION("KUnit tests for arm-smmu-v3 driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index e4ebd9e12ad4..2a8b46b948f0 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -26,6 +26,7 @@ #include <linux/pci.h> #include <linux/pci-ats.h> #include <linux/platform_device.h> +#include <linux/string_choices.h> #include <kunit/visibility.h> #include <uapi/linux/iommufd.h> @@ -37,7 +38,7 @@ module_param(disable_msipolling, bool, 0444); MODULE_PARM_DESC(disable_msipolling, "Disable MSI-based polling for CMD_SYNC completion."); -static struct iommu_ops arm_smmu_ops; +static const struct iommu_ops arm_smmu_ops; static struct iommu_dirty_ops arm_smmu_dirty_ops; enum arm_smmu_msi_index { @@ -83,8 +84,28 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { { 0, NULL}, }; -static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_device *smmu, u32 flags); +static const char * const event_str[] = { + [EVT_ID_BAD_STREAMID_CONFIG] = "C_BAD_STREAMID", + [EVT_ID_STE_FETCH_FAULT] = "F_STE_FETCH", + [EVT_ID_BAD_STE_CONFIG] = "C_BAD_STE", + [EVT_ID_STREAM_DISABLED_FAULT] = "F_STREAM_DISABLED", + [EVT_ID_BAD_SUBSTREAMID_CONFIG] = "C_BAD_SUBSTREAMID", + [EVT_ID_CD_FETCH_FAULT] = "F_CD_FETCH", + [EVT_ID_BAD_CD_CONFIG] = "C_BAD_CD", + [EVT_ID_TRANSLATION_FAULT] = "F_TRANSLATION", + [EVT_ID_ADDR_SIZE_FAULT] = "F_ADDR_SIZE", + [EVT_ID_ACCESS_FAULT] = "F_ACCESS", + [EVT_ID_PERMISSION_FAULT] = "F_PERMISSION", + [EVT_ID_VMS_FETCH_FAULT] = "F_VMS_FETCH", +}; + +static const char * const event_class_str[] = { + [0] = "CD fetch", + [1] = "Stage 1 translation table fetch", + [2] = "Input address caused fault", + [3] = "Reserved", +}; + static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master); static void parse_driver_options(struct arm_smmu_device *smmu) @@ -1031,7 +1052,7 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR | STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH | STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW | - STRTAB_STE_1_EATS); + STRTAB_STE_1_EATS | STRTAB_STE_1_MEV); used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID); /* @@ -1047,7 +1068,7 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) if (cfg & BIT(1)) { used_bits[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS | - STRTAB_STE_1_SHCFG); + STRTAB_STE_1_SHCFG | STRTAB_STE_1_MEV); used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI | @@ -1759,17 +1780,53 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid) } /* IRQ and event handlers */ -static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) +static void arm_smmu_decode_event(struct arm_smmu_device *smmu, u64 *raw, + struct arm_smmu_event *event) +{ + struct arm_smmu_master *master; + + event->id = FIELD_GET(EVTQ_0_ID, raw[0]); + event->sid = FIELD_GET(EVTQ_0_SID, raw[0]); + event->ssv = FIELD_GET(EVTQ_0_SSV, raw[0]); + event->ssid = event->ssv ? FIELD_GET(EVTQ_0_SSID, raw[0]) : IOMMU_NO_PASID; + event->privileged = FIELD_GET(EVTQ_1_PnU, raw[1]); + event->instruction = FIELD_GET(EVTQ_1_InD, raw[1]); + event->s2 = FIELD_GET(EVTQ_1_S2, raw[1]); + event->read = FIELD_GET(EVTQ_1_RnW, raw[1]); + event->stag = FIELD_GET(EVTQ_1_STAG, raw[1]); + event->stall = FIELD_GET(EVTQ_1_STALL, raw[1]); + event->class = FIELD_GET(EVTQ_1_CLASS, raw[1]); + event->iova = FIELD_GET(EVTQ_2_ADDR, raw[2]); + event->ipa = raw[3] & EVTQ_3_IPA; + event->fetch_addr = raw[3] & EVTQ_3_FETCH_ADDR; + event->ttrnw = FIELD_GET(EVTQ_1_TT_READ, raw[1]); + event->class_tt = false; + event->dev = NULL; + + if (event->id == EVT_ID_PERMISSION_FAULT) + event->class_tt = (event->class == EVTQ_1_CLASS_TT); + + mutex_lock(&smmu->streams_mutex); + master = arm_smmu_find_master(smmu, event->sid); + if (master) + event->dev = get_device(master->dev); + mutex_unlock(&smmu->streams_mutex); +} + +static int arm_smmu_handle_event(struct arm_smmu_device *smmu, u64 *evt, + struct arm_smmu_event *event) { int ret = 0; u32 perm = 0; struct arm_smmu_master *master; - bool ssid_valid = evt[0] & EVTQ_0_SSV; - u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]); struct iopf_fault fault_evt = { }; struct iommu_fault *flt = &fault_evt.fault; - switch (FIELD_GET(EVTQ_0_ID, evt[0])) { + switch (event->id) { + case EVT_ID_BAD_STE_CONFIG: + case EVT_ID_STREAM_DISABLED_FAULT: + case EVT_ID_BAD_SUBSTREAMID_CONFIG: + case EVT_ID_BAD_CD_CONFIG: case EVT_ID_TRANSLATION_FAULT: case EVT_ID_ADDR_SIZE_FAULT: case EVT_ID_ACCESS_FAULT: @@ -1779,69 +1836,126 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt) return -EOPNOTSUPP; } - if (!(evt[1] & EVTQ_1_STALL)) - return -EOPNOTSUPP; - - if (evt[1] & EVTQ_1_RnW) - perm |= IOMMU_FAULT_PERM_READ; - else - perm |= IOMMU_FAULT_PERM_WRITE; + if (event->stall) { + if (event->read) + perm |= IOMMU_FAULT_PERM_READ; + else + perm |= IOMMU_FAULT_PERM_WRITE; - if (evt[1] & EVTQ_1_InD) - perm |= IOMMU_FAULT_PERM_EXEC; + if (event->instruction) + perm |= IOMMU_FAULT_PERM_EXEC; - if (evt[1] & EVTQ_1_PnU) - perm |= IOMMU_FAULT_PERM_PRIV; + if (event->privileged) + perm |= IOMMU_FAULT_PERM_PRIV; - flt->type = IOMMU_FAULT_PAGE_REQ; - flt->prm = (struct iommu_fault_page_request) { - .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, - .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]), - .perm = perm, - .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]), - }; + flt->type = IOMMU_FAULT_PAGE_REQ; + flt->prm = (struct iommu_fault_page_request){ + .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, + .grpid = event->stag, + .perm = perm, + .addr = event->iova, + }; - if (ssid_valid) { - flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; - flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]); + if (event->ssv) { + flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + flt->prm.pasid = event->ssid; + } } mutex_lock(&smmu->streams_mutex); - master = arm_smmu_find_master(smmu, sid); + master = arm_smmu_find_master(smmu, event->sid); if (!master) { ret = -EINVAL; goto out_unlock; } - ret = iommu_report_device_fault(master->dev, &fault_evt); + if (event->stall) + ret = iommu_report_device_fault(master->dev, &fault_evt); + else if (master->vmaster && !event->s2) + ret = arm_vmaster_report_event(master->vmaster, evt); + else + ret = -EOPNOTSUPP; /* Unhandled events should be pinned */ out_unlock: mutex_unlock(&smmu->streams_mutex); return ret; } +static void arm_smmu_dump_raw_event(struct arm_smmu_device *smmu, u64 *raw, + struct arm_smmu_event *event) +{ + int i; + + dev_err(smmu->dev, "event 0x%02x received:\n", event->id); + + for (i = 0; i < EVTQ_ENT_DWORDS; ++i) + dev_err(smmu->dev, "\t0x%016llx\n", raw[i]); +} + +#define ARM_SMMU_EVT_KNOWN(e) ((e)->id < ARRAY_SIZE(event_str) && event_str[(e)->id]) +#define ARM_SMMU_LOG_EVT_STR(e) ARM_SMMU_EVT_KNOWN(e) ? event_str[(e)->id] : "UNKNOWN" +#define ARM_SMMU_LOG_CLIENT(e) (e)->dev ? dev_name((e)->dev) : "(unassigned sid)" + +static void arm_smmu_dump_event(struct arm_smmu_device *smmu, u64 *raw, + struct arm_smmu_event *evt, + struct ratelimit_state *rs) +{ + if (!__ratelimit(rs)) + return; + + arm_smmu_dump_raw_event(smmu, raw, evt); + + switch (evt->id) { + case EVT_ID_TRANSLATION_FAULT: + case EVT_ID_ADDR_SIZE_FAULT: + case EVT_ID_ACCESS_FAULT: + case EVT_ID_PERMISSION_FAULT: + dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x iova: %#llx ipa: %#llx", + ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt), + evt->sid, evt->ssid, evt->iova, evt->ipa); + + dev_err(smmu->dev, "%s %s %s %s \"%s\"%s%s stag: %#x", + evt->privileged ? "priv" : "unpriv", + evt->instruction ? "inst" : "data", + str_read_write(evt->read), + evt->s2 ? "s2" : "s1", event_class_str[evt->class], + evt->class_tt ? (evt->ttrnw ? " ttd_read" : " ttd_write") : "", + evt->stall ? " stall" : "", evt->stag); + + break; + + case EVT_ID_STE_FETCH_FAULT: + case EVT_ID_CD_FETCH_FAULT: + case EVT_ID_VMS_FETCH_FAULT: + dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x fetch_addr: %#llx", + ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt), + evt->sid, evt->ssid, evt->fetch_addr); + + break; + + default: + dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x", + ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt), + evt->sid, evt->ssid); + } +} + static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) { - int i, ret; + u64 evt[EVTQ_ENT_DWORDS]; + struct arm_smmu_event event = {0}; struct arm_smmu_device *smmu = dev; struct arm_smmu_queue *q = &smmu->evtq.q; struct arm_smmu_ll_queue *llq = &q->llq; static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - u64 evt[EVTQ_ENT_DWORDS]; do { while (!queue_remove_raw(q, evt)) { - u8 id = FIELD_GET(EVTQ_0_ID, evt[0]); - - ret = arm_smmu_handle_evt(smmu, evt); - if (!ret || !__ratelimit(&rs)) - continue; - - dev_info(smmu->dev, "event 0x%02x received:\n", id); - for (i = 0; i < ARRAY_SIZE(evt); ++i) - dev_info(smmu->dev, "\t0x%016llx\n", - (unsigned long long)evt[i]); + arm_smmu_decode_event(smmu, evt, &event); + if (arm_smmu_handle_event(smmu, evt, &event)) + arm_smmu_dump_event(smmu, evt, &event, &rs); + put_device(event.dev); cond_resched(); } @@ -2353,39 +2467,12 @@ struct arm_smmu_domain *arm_smmu_domain_alloc(void) if (!smmu_domain) return ERR_PTR(-ENOMEM); - mutex_init(&smmu_domain->init_mutex); INIT_LIST_HEAD(&smmu_domain->devices); spin_lock_init(&smmu_domain->devices_lock); return smmu_domain; } -static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) -{ - struct arm_smmu_domain *smmu_domain; - - /* - * Allocate the domain and initialise some of its data structures. - * We can't really do anything meaningful until we've added a - * master. - */ - smmu_domain = arm_smmu_domain_alloc(); - if (IS_ERR(smmu_domain)) - return ERR_CAST(smmu_domain); - - if (dev) { - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - int ret; - - ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0); - if (ret) { - kfree(smmu_domain); - return ERR_PTR(ret); - } - } - return &smmu_domain->domain; -} - static void arm_smmu_domain_free_paging(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); @@ -2451,12 +2538,6 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, struct arm_smmu_domain *smmu_domain); bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; - /* Restrict the stage to what we can actually support */ - if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) - smmu_domain->stage = ARM_SMMU_DOMAIN_S2; - if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2)) - smmu_domain->stage = ARM_SMMU_DOMAIN_S1; - pgtbl_cfg = (struct io_pgtable_cfg) { .pgsize_bitmap = smmu->pgsize_bitmap, .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, @@ -2639,6 +2720,7 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master) static struct arm_smmu_master_domain * arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, + struct iommu_domain *domain, struct arm_smmu_master *master, ioasid_t ssid, bool nested_ats_flush) { @@ -2649,6 +2731,7 @@ arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) { if (master_domain->master == master && + master_domain->domain == domain && master_domain->ssid == ssid && master_domain->nested_ats_flush == nested_ats_flush) return master_domain; @@ -2675,6 +2758,58 @@ to_smmu_domain_devices(struct iommu_domain *domain) return NULL; } +static int arm_smmu_enable_iopf(struct arm_smmu_master *master, + struct arm_smmu_master_domain *master_domain) +{ + int ret; + + iommu_group_mutex_assert(master->dev); + + if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA)) + return -EOPNOTSUPP; + + /* + * Drivers for devices supporting PRI or stall require iopf others have + * device-specific fault handlers and don't need IOPF, so this is not a + * failure. + */ + if (!master->stall_enabled) + return 0; + + /* We're not keeping track of SIDs in fault events */ + if (master->num_streams != 1) + return -EOPNOTSUPP; + + if (master->iopf_refcount) { + master->iopf_refcount++; + master_domain->using_iopf = true; + return 0; + } + + ret = iopf_queue_add_device(master->smmu->evtq.iopf, master->dev); + if (ret) + return ret; + master->iopf_refcount = 1; + master_domain->using_iopf = true; + return 0; +} + +static void arm_smmu_disable_iopf(struct arm_smmu_master *master, + struct arm_smmu_master_domain *master_domain) +{ + iommu_group_mutex_assert(master->dev); + + if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA)) + return; + + if (!master_domain || !master_domain->using_iopf) + return; + + master->iopf_refcount--; + if (master->iopf_refcount == 0) + iopf_queue_remove_device(master->smmu->evtq.iopf, master->dev); +} + static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, struct iommu_domain *domain, ioasid_t ssid) @@ -2691,15 +2826,17 @@ static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, nested_ats_flush = to_smmu_nested_domain(domain)->enable_ats; spin_lock_irqsave(&smmu_domain->devices_lock, flags); - master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid, - nested_ats_flush); + master_domain = arm_smmu_find_master_domain(smmu_domain, domain, master, + ssid, nested_ats_flush); if (master_domain) { list_del(&master_domain->devices_elm); - kfree(master_domain); if (master->ats_enabled) atomic_dec(&smmu_domain->nr_ats_masters); } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + + arm_smmu_disable_iopf(master, master_domain); + kfree(master_domain); } /* @@ -2730,6 +2867,7 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(new_domain); unsigned long flags; + int ret; /* * arm_smmu_share_asid() must not see two domains pointing to the same @@ -2745,24 +2883,45 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, * Translation Requests and Translated transactions are denied * as though ATS is disabled for the stream (STE.EATS == 0b00), * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events - * (IHI0070Ea 5.2 Stream Table Entry). Thus ATS can only be - * enabled if we have arm_smmu_domain, those always have page - * tables. + * (IHI0070Ea 5.2 Stream Table Entry). + * + * However, if we have installed a CD table and are using S1DSS + * then ATS will work in S1DSS bypass. See "13.6.4 Full ATS + * skipping stage 1". + * + * Disable ATS if we are going to create a normal 0b100 bypass + * STE. */ state->ats_enabled = !state->disable_ats && arm_smmu_ats_supported(master); } if (smmu_domain) { + if (new_domain->type == IOMMU_DOMAIN_NESTED) { + ret = arm_smmu_attach_prepare_vmaster( + state, to_smmu_nested_domain(new_domain)); + if (ret) + return ret; + } + master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL); - if (!master_domain) - return -ENOMEM; + if (!master_domain) { + ret = -ENOMEM; + goto err_free_vmaster; + } + master_domain->domain = new_domain; master_domain->master = master; master_domain->ssid = state->ssid; if (new_domain->type == IOMMU_DOMAIN_NESTED) master_domain->nested_ats_flush = to_smmu_nested_domain(new_domain)->enable_ats; + if (new_domain->iopf_handler) { + ret = arm_smmu_enable_iopf(master, master_domain); + if (ret) + goto err_free_master_domain; + } + /* * During prepare we want the current smmu_domain and new * smmu_domain to be in the devices list before we change any @@ -2782,8 +2941,8 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, !arm_smmu_master_canwbs(master)) { spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - kfree(master_domain); - return -EINVAL; + ret = -EINVAL; + goto err_iopf; } if (state->ats_enabled) @@ -2802,6 +2961,14 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, wmb(); } return 0; + +err_iopf: + arm_smmu_disable_iopf(master, master_domain); +err_free_master_domain: + kfree(master_domain); +err_free_vmaster: + kfree(state->vmaster); + return ret; } /* @@ -2815,6 +2982,8 @@ void arm_smmu_attach_commit(struct arm_smmu_attach_state *state) lockdep_assert_held(&arm_smmu_asid_lock); + arm_smmu_attach_commit_vmaster(state); + if (state->ats_enabled && !master->ats_enabled) { arm_smmu_enable_ats(master); } else if (state->ats_enabled && master->ats_enabled) { @@ -2828,9 +2997,9 @@ void arm_smmu_attach_commit(struct arm_smmu_attach_state *state) /* ATS is being switched off, invalidate the entire ATC */ arm_smmu_atc_inv_master(master, IOMMU_NO_PASID); } - master->ats_enabled = state->ats_enabled; arm_smmu_remove_master_domain(master, state->old_domain, state->ssid); + master->ats_enabled = state->ats_enabled; } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -2853,16 +3022,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) state.master = master = dev_iommu_priv_get(dev); smmu = master->smmu; - mutex_lock(&smmu_domain->init_mutex); - - if (!smmu_domain->smmu) { - ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0); - } else if (smmu_domain->smmu != smmu) - ret = -EINVAL; - - mutex_unlock(&smmu_domain->init_mutex); - if (ret) - return ret; + if (smmu_domain->smmu != smmu) + return -EINVAL; if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID); @@ -2918,16 +3079,9 @@ static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain, struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_cd target_cd; - int ret = 0; - mutex_lock(&smmu_domain->init_mutex); - if (!smmu_domain->smmu) - ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0); - else if (smmu_domain->smmu != smmu) - ret = -EINVAL; - mutex_unlock(&smmu_domain->init_mutex); - if (ret) - return ret; + if (smmu_domain->smmu != smmu) + return -EINVAL; if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) return -EINVAL; @@ -3016,13 +3170,12 @@ out_unlock: return ret; } -static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, - struct iommu_domain *domain) +static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain, + struct device *dev, ioasid_t pasid, + struct iommu_domain *old_domain) { + struct arm_smmu_domain *smmu_domain = to_smmu_domain(old_domain); struct arm_smmu_master *master = dev_iommu_priv_get(dev); - struct arm_smmu_domain *smmu_domain; - - smmu_domain = to_smmu_domain(domain); mutex_lock(&arm_smmu_asid_lock); arm_smmu_clear_cd(master, pasid); @@ -3043,6 +3196,7 @@ static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, sid_domain->type == IOMMU_DOMAIN_BLOCKED) sid_domain->ops->attach_dev(sid_domain, dev); } + return 0; } static void arm_smmu_attach_dev_ste(struct iommu_domain *domain, @@ -3070,8 +3224,10 @@ static void arm_smmu_attach_dev_ste(struct iommu_domain *domain, if (arm_smmu_ssids_in_use(&master->cd_table)) { /* * If a CD table has to be present then we need to run with ATS - * on even though the RID will fail ATS queries with UR. This is - * because we have no idea what the PASID's need. + * on because we have to assume a PASID is using ATS. For + * IDENTITY this will setup things so that S1DSS=bypass which + * follows the explanation in "13.6.4 Full ATS skipping stage 1" + * and allows for ATS on the RID to work. */ state.cd_needs_ats = true; arm_smmu_attach_prepare(&state, domain); @@ -3097,6 +3253,7 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, struct arm_smmu_ste ste; struct arm_smmu_master *master = dev_iommu_priv_get(dev); + arm_smmu_master_clear_vmaster(master); arm_smmu_make_bypass_ste(master->smmu, &ste); arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS); return 0; @@ -3115,7 +3272,9 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain, struct device *dev) { struct arm_smmu_ste ste; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + arm_smmu_master_clear_vmaster(master); arm_smmu_make_abort_ste(&ste); arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_TERMINATE); @@ -3124,6 +3283,7 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain, static const struct iommu_domain_ops arm_smmu_blocked_ops = { .attach_dev = arm_smmu_attach_dev_blocked, + .set_dev_pasid = arm_smmu_blocking_set_dev_pasid, }; static struct iommu_domain arm_smmu_blocked_domain = { @@ -3136,6 +3296,7 @@ arm_smmu_domain_alloc_paging_flags(struct device *dev, u32 flags, const struct iommu_user_data *user_data) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_device *smmu = master->smmu; const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID | IOMMU_HWPT_ALLOC_NEST_PARENT; @@ -3147,25 +3308,43 @@ arm_smmu_domain_alloc_paging_flags(struct device *dev, u32 flags, if (user_data) return ERR_PTR(-EOPNOTSUPP); - if (flags & IOMMU_HWPT_ALLOC_PASID) - return arm_smmu_domain_alloc_paging(dev); - smmu_domain = arm_smmu_domain_alloc(); if (IS_ERR(smmu_domain)) return ERR_CAST(smmu_domain); - if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT) { - if (!(master->smmu->features & ARM_SMMU_FEAT_NESTING)) { + switch (flags) { + case 0: + /* Prefer S1 if available */ + if (smmu->features & ARM_SMMU_FEAT_TRANS_S1) + smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + else + smmu_domain->stage = ARM_SMMU_DOMAIN_S2; + break; + case IOMMU_HWPT_ALLOC_NEST_PARENT: + if (!(smmu->features & ARM_SMMU_FEAT_NESTING)) { ret = -EOPNOTSUPP; goto err_free; } smmu_domain->stage = ARM_SMMU_DOMAIN_S2; smmu_domain->nest_parent = true; + break; + case IOMMU_HWPT_ALLOC_DIRTY_TRACKING: + case IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID: + case IOMMU_HWPT_ALLOC_PASID: + if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) { + ret = -EOPNOTSUPP; + goto err_free; + } + smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + break; + default: + ret = -EOPNOTSUPP; + goto err_free; } smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED; smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops; - ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, flags); + ret = arm_smmu_domain_finalise(smmu_domain, smmu, flags); if (ret) goto err_free; return &smmu_domain->domain; @@ -3237,8 +3416,8 @@ static struct platform_driver arm_smmu_driver; static struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode) { - struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver, - fwnode); + struct device *dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode); + put_device(dev); return dev ? dev_get_drvdata(dev) : NULL; } @@ -3279,6 +3458,7 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, mutex_lock(&smmu->streams_mutex); for (i = 0; i < fwspec->num_ids; i++) { struct arm_smmu_stream *new_stream = &master->streams[i]; + struct rb_node *existing; u32 sid = fwspec->ids[i]; new_stream->id = sid; @@ -3289,11 +3469,21 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, break; /* Insert into SID tree */ - if (rb_find_add(&new_stream->node, &smmu->streams, - arm_smmu_streams_cmp_node)) { - dev_warn(master->dev, "stream %u already in tree\n", - sid); - ret = -EINVAL; + existing = rb_find_add(&new_stream->node, &smmu->streams, + arm_smmu_streams_cmp_node); + if (existing) { + struct arm_smmu_master *existing_master = + rb_entry(existing, struct arm_smmu_stream, node) + ->master; + + /* Bridged PCI devices may end up with duplicated IDs */ + if (existing_master == master) + continue; + + dev_warn(master->dev, + "Aliasing StreamID 0x%x (from %s) unsupported, expect DMA to be broken\n", + sid, dev_name(existing_master->dev)); + ret = -ENODEV; break; } } @@ -3390,8 +3580,7 @@ static void arm_smmu_release_device(struct device *dev) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); - if (WARN_ON(arm_smmu_master_sva_enabled(master))) - iopf_queue_remove_device(master->smmu->evtq.iopf, dev); + WARN_ON(master->iopf_refcount); /* Put the STE back to what arm_smmu_init_strtab() sets */ if (dev->iommu->require_direct) @@ -3466,58 +3655,6 @@ static void arm_smmu_get_resv_regions(struct device *dev, iommu_dma_get_resv_regions(dev, head); } -static int arm_smmu_dev_enable_feature(struct device *dev, - enum iommu_dev_features feat) -{ - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - - if (!master) - return -ENODEV; - - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - if (!arm_smmu_master_iopf_supported(master)) - return -EINVAL; - if (master->iopf_enabled) - return -EBUSY; - master->iopf_enabled = true; - return 0; - case IOMMU_DEV_FEAT_SVA: - if (!arm_smmu_master_sva_supported(master)) - return -EINVAL; - if (arm_smmu_master_sva_enabled(master)) - return -EBUSY; - return arm_smmu_master_enable_sva(master); - default: - return -EINVAL; - } -} - -static int arm_smmu_dev_disable_feature(struct device *dev, - enum iommu_dev_features feat) -{ - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - - if (!master) - return -EINVAL; - - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - if (!master->iopf_enabled) - return -EINVAL; - if (master->sva_enabled) - return -EBUSY; - master->iopf_enabled = false; - return 0; - case IOMMU_DEV_FEAT_SVA: - if (!arm_smmu_master_sva_enabled(master)) - return -EINVAL; - return arm_smmu_master_disable_sva(master); - default: - return -EINVAL; - } -} - /* * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the * PCIe link and save the data to memory by DMA. The hardware is restricted to @@ -3538,12 +3675,11 @@ static int arm_smmu_def_domain_type(struct device *dev) return 0; } -static struct iommu_ops arm_smmu_ops = { +static const struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, .hw_info = arm_smmu_hw_info, - .domain_alloc_paging = arm_smmu_domain_alloc_paging, .domain_alloc_sva = arm_smmu_sva_domain_alloc, .domain_alloc_paging_flags = arm_smmu_domain_alloc_paging_flags, .probe_device = arm_smmu_probe_device, @@ -3551,14 +3687,11 @@ static struct iommu_ops arm_smmu_ops = { .device_group = arm_smmu_device_group, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, - .remove_dev_pasid = arm_smmu_remove_dev_pasid, - .dev_enable_feat = arm_smmu_dev_enable_feature, - .dev_disable_feat = arm_smmu_dev_disable_feature, .page_response = arm_smmu_page_response, .def_domain_type = arm_smmu_def_domain_type, - .viommu_alloc = arm_vsmmu_alloc, + .get_viommu_size = arm_smmu_get_viommu_size, + .viommu_init = arm_vsmmu_init, .user_pasid_table = 1, - .pgsize_bitmap = -1UL, /* Restricted during device attach */ .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = arm_smmu_attach_dev, @@ -4239,7 +4372,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) */ if (!!(reg & IDR0_COHACC) != coherent) dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n", - coherent ? "true" : "false"); + str_true_false(coherent)); switch (FIELD_GET(IDR0_STALL_MODEL, reg)) { case IDR0_STALL_MODEL_FORCE: @@ -4322,6 +4455,11 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3); if (FIELD_GET(IDR3_RIL, reg)) smmu->features |= ARM_SMMU_FEAT_RANGE_INV; + if (FIELD_GET(IDR3_FWB, reg)) + smmu->features |= ARM_SMMU_FEAT_S2FWB; + + if (FIELD_GET(IDR3_BBM, reg) == 2) + smmu->features |= ARM_SMMU_FEAT_BBML2; /* IDR5 */ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5); @@ -4370,11 +4508,6 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) smmu->oas = 48; } - if (arm_smmu_ops.pgsize_bitmap == -1UL) - arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap; - else - arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap; - /* Set the DMA mask for our table walker */ if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas))) dev_warn(smmu->dev, @@ -4568,6 +4701,7 @@ static void arm_smmu_impl_remove(void *data) static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu) { struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV); + const struct arm_smmu_impl_ops *ops; int ret; if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV)) @@ -4578,11 +4712,24 @@ static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu) if (IS_ERR(new_smmu)) return new_smmu; + ops = new_smmu->impl_ops; + if (ops) { + /* get_viommu_size and vsmmu_init ops must be paired */ + if (WARN_ON(!ops->get_viommu_size != !ops->vsmmu_init)) { + ret = -EINVAL; + goto err_remove; + } + } + ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove, new_smmu); if (ret) return ERR_PTR(ret); return new_smmu; + +err_remove: + arm_smmu_impl_remove(new_smmu); + return ERR_PTR(ret); } static int arm_smmu_device_probe(struct platform_device *pdev) @@ -4663,7 +4810,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Initialise in-memory data structures */ ret = arm_smmu_init_structures(smmu); if (ret) - return ret; + goto err_free_iopf; /* Record our private device structure */ platform_set_drvdata(pdev, smmu); @@ -4674,22 +4821,29 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Reset the device */ ret = arm_smmu_device_reset(smmu); if (ret) - return ret; + goto err_disable; /* And we're up. Go go go! */ ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, "smmu3.%pa", &ioaddr); if (ret) - return ret; + goto err_disable; ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev); if (ret) { dev_err(dev, "Failed to register iommu\n"); - iommu_device_sysfs_remove(&smmu->iommu); - return ret; + goto err_free_sysfs; } return 0; + +err_free_sysfs: + iommu_device_sysfs_remove(&smmu->iommu); +err_disable: + arm_smmu_device_disable(smmu); +err_free_iopf: + iopf_queue_free(smmu->evtq.iopf); + return ret; } static void arm_smmu_device_remove(struct platform_device *pdev) @@ -4729,7 +4883,7 @@ static struct platform_driver arm_smmu_driver = { .suppress_bind_attrs = true, }, .probe = arm_smmu_device_probe, - .remove_new = arm_smmu_device_remove, + .remove = arm_smmu_device_remove, .shutdown = arm_smmu_device_shutdown, }; module_driver(arm_smmu_driver, platform_driver_register, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 0107d3f333a1..ae23aacc3840 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -16,6 +16,7 @@ #include <linux/sizes.h> struct arm_smmu_device; +struct arm_vsmmu; /* MMIO registers */ #define ARM_SMMU_IDR0 0x0 @@ -60,6 +61,7 @@ struct arm_smmu_device; #define ARM_SMMU_IDR3 0xc #define IDR3_FWB (1 << 8) #define IDR3_RIL (1 << 10) +#define IDR3_BBM GENMASK(12, 11) #define ARM_SMMU_IDR5 0x14 #define IDR5_STALL_MAX GENMASK(31, 16) @@ -266,6 +268,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4) #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6) +#define STRTAB_STE_1_MEV (1UL << 19) #define STRTAB_STE_1_S2FWB (1UL << 25) #define STRTAB_STE_1_S1STALLD (1UL << 27) @@ -452,10 +455,18 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid) #define EVTQ_0_ID GENMASK_ULL(7, 0) +#define EVT_ID_BAD_STREAMID_CONFIG 0x02 +#define EVT_ID_STE_FETCH_FAULT 0x03 +#define EVT_ID_BAD_STE_CONFIG 0x04 +#define EVT_ID_STREAM_DISABLED_FAULT 0x06 +#define EVT_ID_BAD_SUBSTREAMID_CONFIG 0x08 +#define EVT_ID_CD_FETCH_FAULT 0x09 +#define EVT_ID_BAD_CD_CONFIG 0x0a #define EVT_ID_TRANSLATION_FAULT 0x10 #define EVT_ID_ADDR_SIZE_FAULT 0x11 #define EVT_ID_ACCESS_FAULT 0x12 #define EVT_ID_PERMISSION_FAULT 0x13 +#define EVT_ID_VMS_FETCH_FAULT 0x25 #define EVTQ_0_SSV (1UL << 11) #define EVTQ_0_SSID GENMASK_ULL(31, 12) @@ -467,9 +478,11 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid) #define EVTQ_1_RnW (1UL << 35) #define EVTQ_1_S2 (1UL << 39) #define EVTQ_1_CLASS GENMASK_ULL(41, 40) +#define EVTQ_1_CLASS_TT 0x01 #define EVTQ_1_TT_READ (1UL << 44) #define EVTQ_2_ADDR GENMASK_ULL(63, 0) #define EVTQ_3_IPA GENMASK_ULL(51, 12) +#define EVTQ_3_FETCH_ADDR GENMASK_ULL(51, 3) /* PRI queue */ #define PRIQ_ENT_SZ_SHIFT 4 @@ -709,6 +722,16 @@ struct arm_smmu_impl_ops { int (*init_structures)(struct arm_smmu_device *smmu); struct arm_smmu_cmdq *(*get_secondary_cmdq)( struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent); + /* + * An implementation should define its own type other than the default + * IOMMU_HW_INFO_TYPE_ARM_SMMUV3. And it must validate the input @type + * to return its own structure. + */ + void *(*hw_info)(struct arm_smmu_device *smmu, u32 *length, + enum iommu_hw_info_type *type); + size_t (*get_viommu_size)(enum iommu_viommu_type viommu_type); + int (*vsmmu_init)(struct arm_vsmmu *vsmmu, + const struct iommu_user_data *user_data); }; /* An SMMUv3 instance */ @@ -744,6 +767,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_HA (1 << 21) #define ARM_SMMU_FEAT_HD (1 << 22) #define ARM_SMMU_FEAT_S2FWB (1 << 23) +#define ARM_SMMU_FEAT_BBML2 (1 << 24) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) @@ -789,20 +813,45 @@ struct arm_smmu_stream { struct rb_node node; }; +struct arm_smmu_vmaster { + struct arm_vsmmu *vsmmu; + unsigned long vsid; +}; + +struct arm_smmu_event { + u8 stall : 1, + ssv : 1, + privileged : 1, + instruction : 1, + s2 : 1, + read : 1, + ttrnw : 1, + class_tt : 1; + u8 id; + u8 class; + u16 stag; + u32 sid; + u32 ssid; + u64 iova; + u64 ipa; + u64 fetch_addr; + struct device *dev; +}; + /* SMMU private data for each master */ struct arm_smmu_master { struct arm_smmu_device *smmu; struct device *dev; struct arm_smmu_stream *streams; + struct arm_smmu_vmaster *vmaster; /* use smmu->streams_mutex */ /* Locked by the iommu core using the group mutex */ struct arm_smmu_ctx_desc_cfg cd_table; unsigned int num_streams; bool ats_enabled : 1; bool ste_ats_enabled : 1; bool stall_enabled; - bool sva_enabled; - bool iopf_enabled; unsigned int ssid_bits; + unsigned int iopf_refcount; }; /* SMMU private data for an IOMMU domain */ @@ -813,7 +862,6 @@ enum arm_smmu_domain_stage { struct arm_smmu_domain { struct arm_smmu_device *smmu; - struct mutex init_mutex; /* Protects smmu pointer */ struct io_pgtable_ops *pgtbl_ops; atomic_t nr_ats_masters; @@ -879,8 +927,14 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, struct arm_smmu_master_domain { struct list_head devices_elm; struct arm_smmu_master *master; + /* + * For nested domains the master_domain is threaded onto the S2 parent, + * this points to the IOMMU_DOMAIN_NESTED to disambiguate the masters. + */ + struct iommu_domain *domain; ioasid_t ssid; bool nested_ats_flush : 1; + bool using_iopf : 1; }; static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) @@ -943,6 +997,7 @@ struct arm_smmu_attach_state { bool disable_ats; ioasid_t ssid; /* Resulting state */ + struct arm_smmu_vmaster *vmaster; bool ats_enabled; }; @@ -958,11 +1013,6 @@ int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, #ifdef CONFIG_ARM_SMMU_V3_SVA bool arm_smmu_sva_supported(struct arm_smmu_device *smmu); -bool arm_smmu_master_sva_supported(struct arm_smmu_master *master); -bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master); -int arm_smmu_master_enable_sva(struct arm_smmu_master *master); -int arm_smmu_master_disable_sva(struct arm_smmu_master *master); -bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master); void arm_smmu_sva_notifier_synchronize(void); struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, struct mm_struct *mm); @@ -972,31 +1022,6 @@ static inline bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) return false; } -static inline bool arm_smmu_master_sva_supported(struct arm_smmu_master *master) -{ - return false; -} - -static inline bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master) -{ - return false; -} - -static inline int arm_smmu_master_enable_sva(struct arm_smmu_master *master) -{ - return -ENODEV; -} - -static inline int arm_smmu_master_disable_sva(struct arm_smmu_master *master) -{ - return -ENODEV; -} - -static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master) -{ - return false; -} - static inline void arm_smmu_sva_notifier_synchronize(void) {} #define arm_smmu_sva_domain_alloc NULL @@ -1021,14 +1046,52 @@ struct arm_vsmmu { }; #if IS_ENABLED(CONFIG_ARM_SMMU_V3_IOMMUFD) -void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type); -struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, - struct iommu_domain *parent, - struct iommufd_ctx *ictx, - unsigned int viommu_type); +void *arm_smmu_hw_info(struct device *dev, u32 *length, + enum iommu_hw_info_type *type); +size_t arm_smmu_get_viommu_size(struct device *dev, + enum iommu_viommu_type viommu_type); +int arm_vsmmu_init(struct iommufd_viommu *viommu, + struct iommu_domain *parent_domain, + const struct iommu_user_data *user_data); +int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state, + struct arm_smmu_nested_domain *nested_domain); +void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state); +void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master); +int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt); +struct iommu_domain * +arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, + const struct iommu_user_data *user_data); +int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu, + struct iommu_user_data_array *array); #else +#define arm_smmu_get_viommu_size NULL #define arm_smmu_hw_info NULL -#define arm_vsmmu_alloc NULL +#define arm_vsmmu_init NULL +#define arm_vsmmu_alloc_domain_nested NULL +#define arm_vsmmu_cache_invalidate NULL + +static inline int +arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state, + struct arm_smmu_nested_domain *nested_domain) +{ + return 0; +} + +static inline void +arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state) +{ +} + +static inline void +arm_smmu_master_clear_vmaster(struct arm_smmu_master *master) +{ +} + +static inline int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, + u64 *evt) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_ARM_SMMU_V3_IOMMUFD */ #endif /* _ARM_SMMU_V3_H */ diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index c8ec74f089f3..378104cd395e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -8,7 +8,9 @@ #include <linux/dma-mapping.h> #include <linux/interrupt.h> #include <linux/iommu.h> +#include <linux/iommufd.h> #include <linux/iopoll.h> +#include <uapi/linux/iommufd.h> #include <acpi/acpixf.h> @@ -26,8 +28,10 @@ #define CMDQV_EN BIT(0) #define TEGRA241_CMDQV_PARAM 0x0004 +#define CMDQV_NUM_SID_PER_VM_LOG2 GENMASK(15, 12) #define CMDQV_NUM_VINTF_LOG2 GENMASK(11, 8) #define CMDQV_NUM_VCMDQ_LOG2 GENMASK(7, 4) +#define CMDQV_VER GENMASK(3, 0) #define TEGRA241_CMDQV_STATUS 0x0008 #define CMDQV_ENABLED BIT(0) @@ -53,6 +57,9 @@ #define VINTF_STATUS GENMASK(3, 1) #define VINTF_ENABLED BIT(0) +#define TEGRA241_VINTF_SID_MATCH(s) (0x0040 + 0x4*(s)) +#define TEGRA241_VINTF_SID_REPLACE(s) (0x0080 + 0x4*(s)) + #define TEGRA241_VINTF_LVCMDQ_ERR_MAP_64(m) \ (0x00C0 + 0x8*(m)) #define LVCMDQ_ERR_MAP_NUM_64 2 @@ -79,7 +86,6 @@ #define TEGRA241_VCMDQ_PAGE1(q) (TEGRA241_VCMDQ_PAGE1_BASE + 0x80*(q)) #define VCMDQ_ADDR GENMASK(47, 5) #define VCMDQ_LOG2SIZE GENMASK(4, 0) -#define VCMDQ_LOG2SIZE_MAX 19 #define TEGRA241_VCMDQ_BASE 0x00000 #define TEGRA241_VCMDQ_CONS_INDX_BASE 0x00008 @@ -115,16 +121,20 @@ MODULE_PARM_DESC(bypass_vcmdq, /** * struct tegra241_vcmdq - Virtual Command Queue + * @core: Embedded iommufd_hw_queue structure * @idx: Global index in the CMDQV * @lidx: Local index in the VINTF * @enabled: Enable status * @cmdqv: Parent CMDQV pointer * @vintf: Parent VINTF pointer + * @prev: Previous LVCMDQ to depend on * @cmdq: Command Queue struct * @page0: MMIO Page0 base address * @page1: MMIO Page1 base address */ struct tegra241_vcmdq { + struct iommufd_hw_queue core; + u16 idx; u16 lidx; @@ -132,22 +142,30 @@ struct tegra241_vcmdq { struct tegra241_cmdqv *cmdqv; struct tegra241_vintf *vintf; + struct tegra241_vcmdq *prev; struct arm_smmu_cmdq cmdq; void __iomem *page0; void __iomem *page1; }; +#define hw_queue_to_vcmdq(v) container_of(v, struct tegra241_vcmdq, core) /** * struct tegra241_vintf - Virtual Interface + * @vsmmu: Embedded arm_vsmmu structure * @idx: Global index in the CMDQV * @enabled: Enable status * @hyp_own: Owned by hypervisor (in-kernel) * @cmdqv: Parent CMDQV pointer * @lvcmdqs: List of logical VCMDQ pointers + * @lvcmdq_mutex: Lock to serialize user-allocated lvcmdqs * @base: MMIO base address + * @mmap_offset: Offset argument for mmap() syscall + * @sids: Stream ID mapping resources */ struct tegra241_vintf { + struct arm_vsmmu vsmmu; + u16 idx; bool enabled; @@ -155,19 +173,41 @@ struct tegra241_vintf { struct tegra241_cmdqv *cmdqv; struct tegra241_vcmdq **lvcmdqs; + struct mutex lvcmdq_mutex; /* user space race */ void __iomem *base; + unsigned long mmap_offset; + + struct ida sids; }; +#define viommu_to_vintf(v) container_of(v, struct tegra241_vintf, vsmmu.core) + +/** + * struct tegra241_vintf_sid - Virtual Interface Stream ID Mapping + * @core: Embedded iommufd_vdevice structure, holding virtual Stream ID + * @vintf: Parent VINTF pointer + * @sid: Physical Stream ID + * @idx: Mapping index in the VINTF + */ +struct tegra241_vintf_sid { + struct iommufd_vdevice core; + struct tegra241_vintf *vintf; + u32 sid; + u8 idx; +}; +#define vdev_to_vsid(v) container_of(v, struct tegra241_vintf_sid, core) /** * struct tegra241_cmdqv - CMDQ-V for SMMUv3 * @smmu: SMMUv3 device * @dev: CMDQV device * @base: MMIO base address + * @base_phys: MMIO physical base address, for mmap * @irq: IRQ number * @num_vintfs: Total number of VINTFs * @num_vcmdqs: Total number of VCMDQs * @num_lvcmdqs_per_vintf: Number of logical VCMDQs per VINTF + * @num_sids_per_vintf: Total number of SID mappings per VINTF * @vintf_ids: VINTF id allocator * @vintfs: List of VINTFs */ @@ -176,12 +216,14 @@ struct tegra241_cmdqv { struct device *dev; void __iomem *base; + phys_addr_t base_phys; int irq; /* CMDQV Hardware Params */ u16 num_vintfs; u16 num_vcmdqs; u16 num_lvcmdqs_per_vintf; + u16 num_sids_per_vintf; struct ida vintf_ids; @@ -253,6 +295,22 @@ static inline int vcmdq_write_config(struct tegra241_vcmdq *vcmdq, u32 regval) /* ISR Functions */ +static void tegra241_vintf_user_handle_error(struct tegra241_vintf *vintf) +{ + struct iommufd_viommu *viommu = &vintf->vsmmu.core; + struct iommu_vevent_tegra241_cmdqv vevent_data; + int i; + + for (i = 0; i < LVCMDQ_ERR_MAP_NUM_64; i++) { + u64 err = readq_relaxed(REG_VINTF(vintf, LVCMDQ_ERR_MAP_64(i))); + + vevent_data.lvcmdq_err_map[i] = cpu_to_le64(err); + } + + iommufd_viommu_report_event(viommu, IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV, + &vevent_data, sizeof(vevent_data)); +} + static void tegra241_vintf0_handle_error(struct tegra241_vintf *vintf) { int i; @@ -298,6 +356,14 @@ static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid) vintf_map &= ~BIT_ULL(0); } + /* Handle other user VINTFs and their LVCMDQs */ + while (vintf_map) { + unsigned long idx = __ffs64(vintf_map); + + tegra241_vintf_user_handle_error(cmdqv->vintfs[idx]); + vintf_map &= ~BIT_ULL(idx); + } + return IRQ_HANDLED; } @@ -339,7 +405,7 @@ tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu, * one CPU at a time can enter the process, while the others * will be spinning at the same lock. */ - lidx = smp_processor_id() % cmdqv->num_lvcmdqs_per_vintf; + lidx = raw_smp_processor_id() % cmdqv->num_lvcmdqs_per_vintf; vcmdq = vintf->lvcmdqs[lidx]; if (!vcmdq || !READ_ONCE(vcmdq->enabled)) return NULL; @@ -352,6 +418,30 @@ tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu, /* HW Reset Functions */ +/* + * When a guest-owned VCMDQ is disabled, if the guest did not enqueue a CMD_SYNC + * following an ATC_INV command at the end of the guest queue while this ATC_INV + * is timed out, the TIMEOUT will not be reported until this VCMDQ gets assigned + * to the next VM, which will be a false alarm potentially causing some unwanted + * behavior in the new VM. Thus, a guest-owned VCMDQ must flush the TIMEOUT when + * it gets disabled. This can be done by just issuing a CMD_SYNC to SMMU CMDQ. + */ +static void tegra241_vcmdq_hw_flush_timeout(struct tegra241_vcmdq *vcmdq) +{ + struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu; + u64 cmd_sync[CMDQ_ENT_DWORDS] = {}; + + cmd_sync[0] = FIELD_PREP(CMDQ_0_OP, CMDQ_OP_CMD_SYNC) | + FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE); + + /* + * It does not hurt to insert another CMD_SYNC, taking advantage of the + * arm_smmu_cmdq_issue_cmdlist() that waits for the CMD_SYNC completion. + */ + arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, cmd_sync, 1, true); +} + +/* This function is for LVCMDQ, so @vcmdq must not be unmapped yet */ static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq) { char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); @@ -364,6 +454,8 @@ static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq) readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)), readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS))); } + tegra241_vcmdq_hw_flush_timeout(vcmdq); + writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, PROD)); writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, CONS)); writeq_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, BASE)); @@ -380,6 +472,7 @@ static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq) dev_dbg(vcmdq->cmdqv->dev, "%sdeinited\n", h); } +/* This function is for LVCMDQ, so @vcmdq must be mapped prior */ static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq) { char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); @@ -405,14 +498,45 @@ static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq) return 0; } +/* Unmap a global VCMDQ from the pre-assigned LVCMDQ */ +static void tegra241_vcmdq_unmap_lvcmdq(struct tegra241_vcmdq *vcmdq) +{ + u32 regval = readl(REG_CMDQV(vcmdq->cmdqv, CMDQ_ALLOC(vcmdq->idx))); + char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); + + writel(regval & ~CMDQV_CMDQ_ALLOCATED, + REG_CMDQV(vcmdq->cmdqv, CMDQ_ALLOC(vcmdq->idx))); + dev_dbg(vcmdq->cmdqv->dev, "%sunmapped\n", h); +} + static void tegra241_vintf_hw_deinit(struct tegra241_vintf *vintf) { - u16 lidx; + u16 lidx = vintf->cmdqv->num_lvcmdqs_per_vintf; + int sidx; - for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) - if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) + /* HW requires to unmap LVCMDQs in descending order */ + while (lidx--) { + if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) { tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]); + tegra241_vcmdq_unmap_lvcmdq(vintf->lvcmdqs[lidx]); + } + } vintf_write_config(vintf, 0); + for (sidx = 0; sidx < vintf->cmdqv->num_sids_per_vintf; sidx++) { + writel(0, REG_VINTF(vintf, SID_MATCH(sidx))); + writel(0, REG_VINTF(vintf, SID_REPLACE(sidx))); + } +} + +/* Map a global VCMDQ to the pre-assigned LVCMDQ */ +static void tegra241_vcmdq_map_lvcmdq(struct tegra241_vcmdq *vcmdq) +{ + u32 regval = readl(REG_CMDQV(vcmdq->cmdqv, CMDQ_ALLOC(vcmdq->idx))); + char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); + + writel(regval | CMDQV_CMDQ_ALLOCATED, + REG_CMDQV(vcmdq->cmdqv, CMDQ_ALLOC(vcmdq->idx))); + dev_dbg(vcmdq->cmdqv->dev, "%smapped\n", h); } static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own) @@ -430,7 +554,8 @@ static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own) * whether enabling it here or not, as !HYP_OWN cmdq HWs only support a * restricted set of supported commands. */ - regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own); + regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own) | + FIELD_PREP(VINTF_VMID, vintf->vsmmu.vmid); writel(regval, REG_VINTF(vintf, CONFIG)); ret = vintf_write_config(vintf, regval | VINTF_EN); @@ -442,8 +567,10 @@ static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own) */ vintf->hyp_own = !!(VINTF_HYP_OWN & readl(REG_VINTF(vintf, CONFIG))); + /* HW requires to map LVCMDQs in ascending order */ for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) { if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) { + tegra241_vcmdq_map_lvcmdq(vintf->lvcmdqs[lidx]); ret = tegra241_vcmdq_hw_init(vintf->lvcmdqs[lidx]); if (ret) { tegra241_vintf_hw_deinit(vintf); @@ -477,7 +604,6 @@ static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu) for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) { regval = FIELD_PREP(CMDQV_CMDQ_ALLOC_VINTF, idx); regval |= FIELD_PREP(CMDQV_CMDQ_ALLOC_LVCMDQ, lidx); - regval |= CMDQV_CMDQ_ALLOCATED; writel_relaxed(regval, REG_CMDQV(cmdqv, CMDQ_ALLOC(qidx++))); } @@ -488,29 +614,21 @@ static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu) /* VCMDQ Resource Helpers */ -static void tegra241_vcmdq_free_smmu_cmdq(struct tegra241_vcmdq *vcmdq) -{ - struct arm_smmu_queue *q = &vcmdq->cmdq.q; - size_t nents = 1 << q->llq.max_n_shift; - size_t qsz = nents << CMDQ_ENT_SZ_SHIFT; - - if (!q->base) - return; - dmam_free_coherent(vcmdq->cmdqv->smmu.dev, qsz, q->base, q->base_dma); -} - static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq) { struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu; struct arm_smmu_cmdq *cmdq = &vcmdq->cmdq; struct arm_smmu_queue *q = &cmdq->q; char name[16]; + u32 regval; int ret; snprintf(name, 16, "vcmdq%u", vcmdq->idx); - /* Queue size, capped to ensure natural alignment */ - q->llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, VCMDQ_LOG2SIZE_MAX); + /* Cap queue size to SMMU's IDR1.CMDQS and ensure natural alignment */ + regval = readl_relaxed(smmu->base + ARM_SMMU_IDR1); + q->llq.max_n_shift = + min_t(u32, CMDQ_MAX_SZ_SHIFT, FIELD_GET(IDR1_CMDQS, regval)); /* Use the common helper to init the VCMDQ, and then... */ ret = arm_smmu_init_one_queue(smmu, q, vcmdq->page0, @@ -558,12 +676,15 @@ static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx]; char header[64]; - tegra241_vcmdq_free_smmu_cmdq(vcmdq); + /* Note that the lvcmdq queue memory space is managed by devres */ + tegra241_vintf_deinit_lvcmdq(vintf, lidx); dev_dbg(vintf->cmdqv->dev, "%sdeallocated\n", lvcmdq_error_header(vcmdq, header, 64)); - kfree(vcmdq); + /* Guest-owned VCMDQ is free-ed with hw_queue by iommufd core */ + if (vcmdq->vintf->hyp_own) + kfree(vcmdq); } static struct tegra241_vcmdq * @@ -636,28 +757,27 @@ static int tegra241_cmdqv_init_vintf(struct tegra241_cmdqv *cmdqv, u16 max_idx, /* Remove Helpers */ -static void tegra241_vintf_remove_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) -{ - tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]); - tegra241_vintf_free_lvcmdq(vintf, lidx); -} - static void tegra241_cmdqv_remove_vintf(struct tegra241_cmdqv *cmdqv, u16 idx) { struct tegra241_vintf *vintf = cmdqv->vintfs[idx]; u16 lidx; + tegra241_vintf_hw_deinit(vintf); + /* Remove LVCMDQ resources */ for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) if (vintf->lvcmdqs[lidx]) - tegra241_vintf_remove_lvcmdq(vintf, lidx); - - /* Remove VINTF resources */ - tegra241_vintf_hw_deinit(vintf); + tegra241_vintf_free_lvcmdq(vintf, lidx); dev_dbg(cmdqv->dev, "VINTF%u: deallocated\n", vintf->idx); tegra241_cmdqv_deinit_vintf(cmdqv, idx); - kfree(vintf); + if (!vintf->hyp_own) { + mutex_destroy(&vintf->lvcmdq_mutex); + ida_destroy(&vintf->sids); + /* Guest-owned VINTF is free-ed with viommu by iommufd core */ + } else { + kfree(vintf); + } } static void tegra241_cmdqv_remove(struct arm_smmu_device *smmu) @@ -685,10 +805,51 @@ static void tegra241_cmdqv_remove(struct arm_smmu_device *smmu) put_device(cmdqv->dev); /* smmu->impl_dev */ } +static int +tegra241_cmdqv_init_vintf_user(struct arm_vsmmu *vsmmu, + const struct iommu_user_data *user_data); + +static void *tegra241_cmdqv_hw_info(struct arm_smmu_device *smmu, u32 *length, + enum iommu_hw_info_type *type) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + struct iommu_hw_info_tegra241_cmdqv *info; + u32 regval; + + if (*type != IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV) + return ERR_PTR(-EOPNOTSUPP); + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + regval = readl_relaxed(REG_CMDQV(cmdqv, PARAM)); + info->log2vcmdqs = ilog2(cmdqv->num_lvcmdqs_per_vintf); + info->log2vsids = ilog2(cmdqv->num_sids_per_vintf); + info->version = FIELD_GET(CMDQV_VER, regval); + + *length = sizeof(*info); + *type = IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV; + return info; +} + +static size_t tegra241_cmdqv_get_vintf_size(enum iommu_viommu_type viommu_type) +{ + if (viommu_type != IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV) + return 0; + return VIOMMU_STRUCT_SIZE(struct tegra241_vintf, vsmmu.core); +} + static struct arm_smmu_impl_ops tegra241_cmdqv_impl_ops = { + /* For in-kernel use */ .get_secondary_cmdq = tegra241_cmdqv_get_cmdq, .device_reset = tegra241_cmdqv_hw_reset, .device_remove = tegra241_cmdqv_remove, + /* For user-space use */ + .hw_info = tegra241_cmdqv_hw_info, + .get_viommu_size = tegra241_cmdqv_get_vintf_size, + .vsmmu_init = tegra241_cmdqv_init_vintf_user, }; /* Probe Functions */ @@ -766,13 +927,13 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu) vintf = kzalloc(sizeof(*vintf), GFP_KERNEL); if (!vintf) - goto out_fallback; + return -ENOMEM; /* Init VINTF0 for in-kernel use */ ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf); if (ret) { dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret); - goto free_vintf; + return ret; } /* Preallocate logical VCMDQs to VINTF0 */ @@ -781,24 +942,12 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu) vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx); if (IS_ERR(vcmdq)) - goto free_lvcmdq; + return PTR_ERR(vcmdq); } /* Now, we are ready to run all the impl ops */ smmu->impl_ops = &tegra241_cmdqv_impl_ops; return 0; - -free_lvcmdq: - for (lidx--; lidx >= 0; lidx--) - tegra241_vintf_free_lvcmdq(vintf, lidx); - tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx); -free_vintf: - kfree(vintf); -out_fallback: - dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n"); - smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV; - tegra241_cmdqv_remove(smmu); - return 0; } #ifdef CONFIG_IOMMU_DEBUGFS @@ -842,10 +991,12 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, cmdqv->irq = irq; cmdqv->base = base; cmdqv->dev = smmu->impl_dev; + cmdqv->base_phys = res->start; if (cmdqv->irq > 0) { - ret = request_irq(irq, tegra241_cmdqv_isr, 0, "tegra241-cmdqv", - cmdqv); + ret = request_threaded_irq(irq, NULL, tegra241_cmdqv_isr, + IRQF_ONESHOT, "tegra241-cmdqv", + cmdqv); if (ret) { dev_err(cmdqv->dev, "failed to request irq (%d): %d\n", cmdqv->irq, ret); @@ -857,6 +1008,8 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, cmdqv->num_vintfs = 1 << FIELD_GET(CMDQV_NUM_VINTF_LOG2, regval); cmdqv->num_vcmdqs = 1 << FIELD_GET(CMDQV_NUM_VCMDQ_LOG2, regval); cmdqv->num_lvcmdqs_per_vintf = cmdqv->num_vcmdqs / cmdqv->num_vintfs; + cmdqv->num_sids_per_vintf = + 1 << FIELD_GET(CMDQV_NUM_SID_PER_VM_LOG2, regval); cmdqv->vintfs = kcalloc(cmdqv->num_vintfs, sizeof(*cmdqv->vintfs), GFP_KERNEL); @@ -910,3 +1063,287 @@ out_fallback: put_device(smmu->impl_dev); return ERR_PTR(-ENODEV); } + +/* User space VINTF and VCMDQ Functions */ + +static size_t tegra241_vintf_get_vcmdq_size(struct iommufd_viommu *viommu, + enum iommu_hw_queue_type queue_type) +{ + if (queue_type != IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV) + return 0; + return HW_QUEUE_STRUCT_SIZE(struct tegra241_vcmdq, core); +} + +static int tegra241_vcmdq_hw_init_user(struct tegra241_vcmdq *vcmdq) +{ + char header[64]; + + /* Configure the vcmdq only; User space does the enabling */ + writeq_relaxed(vcmdq->cmdq.q.q_base, REG_VCMDQ_PAGE1(vcmdq, BASE)); + + dev_dbg(vcmdq->cmdqv->dev, "%sinited at host PA 0x%llx size 0x%lx\n", + lvcmdq_error_header(vcmdq, header, 64), + vcmdq->cmdq.q.q_base & VCMDQ_ADDR, + 1UL << (vcmdq->cmdq.q.q_base & VCMDQ_LOG2SIZE)); + return 0; +} + +static void +tegra241_vintf_destroy_lvcmdq_user(struct iommufd_hw_queue *hw_queue) +{ + struct tegra241_vcmdq *vcmdq = hw_queue_to_vcmdq(hw_queue); + + mutex_lock(&vcmdq->vintf->lvcmdq_mutex); + tegra241_vcmdq_hw_deinit(vcmdq); + tegra241_vcmdq_unmap_lvcmdq(vcmdq); + tegra241_vintf_free_lvcmdq(vcmdq->vintf, vcmdq->lidx); + if (vcmdq->prev) + iommufd_hw_queue_undepend(vcmdq, vcmdq->prev, core); + mutex_unlock(&vcmdq->vintf->lvcmdq_mutex); +} + +static int tegra241_vintf_alloc_lvcmdq_user(struct iommufd_hw_queue *hw_queue, + u32 lidx, phys_addr_t base_addr_pa) +{ + struct tegra241_vintf *vintf = viommu_to_vintf(hw_queue->viommu); + struct tegra241_vcmdq *vcmdq = hw_queue_to_vcmdq(hw_queue); + struct tegra241_cmdqv *cmdqv = vintf->cmdqv; + struct arm_smmu_device *smmu = &cmdqv->smmu; + struct tegra241_vcmdq *prev = NULL; + u32 log2size, max_n_shift; + char header[64]; + int ret; + + if (hw_queue->type != IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV) + return -EOPNOTSUPP; + if (lidx >= cmdqv->num_lvcmdqs_per_vintf) + return -EINVAL; + + mutex_lock(&vintf->lvcmdq_mutex); + + if (vintf->lvcmdqs[lidx]) { + ret = -EEXIST; + goto unlock; + } + + /* + * HW requires to map LVCMDQs in ascending order, so reject if the + * previous lvcmdqs is not allocated yet. + */ + if (lidx) { + prev = vintf->lvcmdqs[lidx - 1]; + if (!prev) { + ret = -EIO; + goto unlock; + } + } + + /* + * hw_queue->length must be a power of 2, in range of + * [ 32, 2 ^ (idr[1].CMDQS + CMDQ_ENT_SZ_SHIFT) ] + */ + max_n_shift = FIELD_GET(IDR1_CMDQS, + readl_relaxed(smmu->base + ARM_SMMU_IDR1)); + if (!is_power_of_2(hw_queue->length) || hw_queue->length < 32 || + hw_queue->length > (1 << (max_n_shift + CMDQ_ENT_SZ_SHIFT))) { + ret = -EINVAL; + goto unlock; + } + log2size = ilog2(hw_queue->length) - CMDQ_ENT_SZ_SHIFT; + + /* base_addr_pa must be aligned to hw_queue->length */ + if (base_addr_pa & ~VCMDQ_ADDR || + base_addr_pa & (hw_queue->length - 1)) { + ret = -EINVAL; + goto unlock; + } + + /* + * HW requires to unmap LVCMDQs in descending order, so destroy() must + * follow this rule. Set a dependency on its previous LVCMDQ so iommufd + * core will help enforce it. + */ + if (prev) { + ret = iommufd_hw_queue_depend(vcmdq, prev, core); + if (ret) + goto unlock; + } + vcmdq->prev = prev; + + ret = tegra241_vintf_init_lvcmdq(vintf, lidx, vcmdq); + if (ret) + goto undepend_vcmdq; + + dev_dbg(cmdqv->dev, "%sallocated\n", + lvcmdq_error_header(vcmdq, header, 64)); + + tegra241_vcmdq_map_lvcmdq(vcmdq); + + vcmdq->cmdq.q.q_base = base_addr_pa & VCMDQ_ADDR; + vcmdq->cmdq.q.q_base |= log2size; + + ret = tegra241_vcmdq_hw_init_user(vcmdq); + if (ret) + goto unmap_lvcmdq; + + hw_queue->destroy = &tegra241_vintf_destroy_lvcmdq_user; + mutex_unlock(&vintf->lvcmdq_mutex); + return 0; + +unmap_lvcmdq: + tegra241_vcmdq_unmap_lvcmdq(vcmdq); + tegra241_vintf_deinit_lvcmdq(vintf, lidx); +undepend_vcmdq: + if (vcmdq->prev) + iommufd_hw_queue_undepend(vcmdq, vcmdq->prev, core); +unlock: + mutex_unlock(&vintf->lvcmdq_mutex); + return ret; +} + +static void tegra241_cmdqv_destroy_vintf_user(struct iommufd_viommu *viommu) +{ + struct tegra241_vintf *vintf = viommu_to_vintf(viommu); + + if (vintf->mmap_offset) + iommufd_viommu_destroy_mmap(&vintf->vsmmu.core, + vintf->mmap_offset); + tegra241_cmdqv_remove_vintf(vintf->cmdqv, vintf->idx); +} + +static void tegra241_vintf_destroy_vsid(struct iommufd_vdevice *vdev) +{ + struct tegra241_vintf_sid *vsid = vdev_to_vsid(vdev); + struct tegra241_vintf *vintf = vsid->vintf; + + writel(0, REG_VINTF(vintf, SID_MATCH(vsid->idx))); + writel(0, REG_VINTF(vintf, SID_REPLACE(vsid->idx))); + ida_free(&vintf->sids, vsid->idx); + dev_dbg(vintf->cmdqv->dev, + "VINTF%u: deallocated SID_REPLACE%d for pSID=%x\n", vintf->idx, + vsid->idx, vsid->sid); +} + +static int tegra241_vintf_init_vsid(struct iommufd_vdevice *vdev) +{ + struct device *dev = iommufd_vdevice_to_device(vdev); + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct tegra241_vintf *vintf = viommu_to_vintf(vdev->viommu); + struct tegra241_vintf_sid *vsid = vdev_to_vsid(vdev); + struct arm_smmu_stream *stream = &master->streams[0]; + u64 virt_sid = vdev->virt_id; + int sidx; + + if (virt_sid > UINT_MAX) + return -EINVAL; + + WARN_ON_ONCE(master->num_streams != 1); + + /* Find an empty pair of SID_REPLACE and SID_MATCH */ + sidx = ida_alloc_max(&vintf->sids, vintf->cmdqv->num_sids_per_vintf - 1, + GFP_KERNEL); + if (sidx < 0) + return sidx; + + writel(stream->id, REG_VINTF(vintf, SID_REPLACE(sidx))); + writel(virt_sid << 1 | 0x1, REG_VINTF(vintf, SID_MATCH(sidx))); + dev_dbg(vintf->cmdqv->dev, + "VINTF%u: allocated SID_REPLACE%d for pSID=%x, vSID=%x\n", + vintf->idx, sidx, stream->id, (u32)virt_sid); + + vsid->idx = sidx; + vsid->vintf = vintf; + vsid->sid = stream->id; + + vdev->destroy = &tegra241_vintf_destroy_vsid; + return 0; +} + +static struct iommufd_viommu_ops tegra241_cmdqv_viommu_ops = { + .destroy = tegra241_cmdqv_destroy_vintf_user, + .alloc_domain_nested = arm_vsmmu_alloc_domain_nested, + /* Non-accelerated commands will be still handled by the kernel */ + .cache_invalidate = arm_vsmmu_cache_invalidate, + .vdevice_size = VDEVICE_STRUCT_SIZE(struct tegra241_vintf_sid, core), + .vdevice_init = tegra241_vintf_init_vsid, + .get_hw_queue_size = tegra241_vintf_get_vcmdq_size, + .hw_queue_init_phys = tegra241_vintf_alloc_lvcmdq_user, +}; + +static int +tegra241_cmdqv_init_vintf_user(struct arm_vsmmu *vsmmu, + const struct iommu_user_data *user_data) +{ + struct tegra241_cmdqv *cmdqv = + container_of(vsmmu->smmu, struct tegra241_cmdqv, smmu); + struct tegra241_vintf *vintf = viommu_to_vintf(&vsmmu->core); + struct iommu_viommu_tegra241_cmdqv data; + phys_addr_t page0_base; + int ret; + + /* + * Unsupported type should be rejected by tegra241_cmdqv_get_vintf_size. + * Seeing one here indicates a kernel bug or some data corruption. + */ + if (WARN_ON(vsmmu->core.type != IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV)) + return -EOPNOTSUPP; + + if (!user_data) + return -EINVAL; + + ret = iommu_copy_struct_from_user(&data, user_data, + IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV, + out_vintf_mmap_length); + if (ret) + return ret; + + ret = tegra241_cmdqv_init_vintf(cmdqv, cmdqv->num_vintfs - 1, vintf); + if (ret < 0) { + dev_err(cmdqv->dev, "no more available vintf\n"); + return ret; + } + + /* + * Initialize the user-owned VINTF without a LVCMDQ, as it cannot pre- + * allocate a LVCMDQ until user space wants one, for security reasons. + * It is different than the kernel-owned VINTF0, which had pre-assigned + * and pre-allocated global VCMDQs that would be mapped to the LVCMDQs + * by the tegra241_vintf_hw_init() call. + */ + ret = tegra241_vintf_hw_init(vintf, false); + if (ret) + goto deinit_vintf; + + page0_base = cmdqv->base_phys + TEGRA241_VINTFi_PAGE0(vintf->idx); + ret = iommufd_viommu_alloc_mmap(&vintf->vsmmu.core, page0_base, SZ_64K, + &vintf->mmap_offset); + if (ret) + goto hw_deinit_vintf; + + data.out_vintf_mmap_length = SZ_64K; + data.out_vintf_mmap_offset = vintf->mmap_offset; + ret = iommu_copy_struct_to_user(user_data, &data, + IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV, + out_vintf_mmap_length); + if (ret) + goto free_mmap; + + ida_init(&vintf->sids); + mutex_init(&vintf->lvcmdq_mutex); + + dev_dbg(cmdqv->dev, "VINTF%u: allocated with vmid (%d)\n", vintf->idx, + vintf->vsmmu.vmid); + + vsmmu->core.ops = &tegra241_cmdqv_viommu_ops; + return 0; + +free_mmap: + iommufd_viommu_destroy_mmap(&vintf->vsmmu.core, vintf->mmap_offset); +hw_deinit_vintf: + tegra241_vintf_hw_deinit(vintf); +deinit_vintf: + tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx); + return ret; +} + +MODULE_IMPORT_NS("IOMMUFD"); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c index 99030e6b16e7..db9b9a8e139c 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c @@ -110,7 +110,6 @@ static struct arm_smmu_device *cavium_smmu_impl_init(struct arm_smmu_device *smm int arm_mmu500_reset(struct arm_smmu_device *smmu) { u32 reg, major; - int i; /* * On MMU-500 r2p0 onwards we need to clear ACR.CACHE_LOCK before * writes to the context bank ACTLRs will stick. And we just hope that @@ -128,11 +127,12 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu) reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN; arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sACR, reg); +#ifdef CONFIG_ARM_SMMU_MMU_500_CPRE_ERRATA /* * Disable MMU-500's not-particularly-beneficial next-page * prefetcher for the sake of at least 5 known errata. */ - for (i = 0; i < smmu->num_context_banks; ++i) { + for (int i = 0; i < smmu->num_context_banks; ++i) { reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR); reg &= ~ARM_MMU500_ACTLR_CPRE; arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, reg); @@ -140,6 +140,7 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu) if (reg & ARM_MMU500_ACTLR_CPRE) dev_warn_once(smmu->dev, "Failed to disable prefetcher for errata workarounds, check SACR.CACHE_LOCK\n"); } +#endif return 0; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index 548783f3f8e8..65e0ef6539fe 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -73,7 +73,7 @@ void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) if (__ratelimit(&rs)) { dev_err(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n"); - cfg = qsmmu->cfg; + cfg = qsmmu->data->cfg; if (!cfg) return; @@ -406,6 +406,12 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) arm_smmu_print_context_fault_info(smmu, idx, &cfi); arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); + + if (cfi.fsr & ARM_SMMU_CB_FSR_SS) { + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, + ret == -EAGAIN ? 0 : ARM_SMMU_RESUME_TERMINATE); + } + return IRQ_HANDLED; } @@ -416,6 +422,9 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) if (!tmp || tmp == -EBUSY) { ret = IRQ_HANDLED; resume = ARM_SMMU_RESUME_TERMINATE; + } else if (tmp == -EAGAIN) { + ret = IRQ_HANDLED; + resume = 0; } else { phys_addr_t phys_atos = qcom_smmu_verify_fault(smmu_domain, cfi.iova, cfi.fsr); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 6372f3e25c4b..57c097e87613 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -16,6 +16,40 @@ #define QCOM_DUMMY_VAL -1 +/* + * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the + * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch + * buffer). The remaining bits are implementation defined and vary across + * SoCs. + */ + +#define CPRE (1 << 1) +#define CMTLB (1 << 0) +#define PREFETCH_SHIFT 8 +#define PREFETCH_DEFAULT 0 +#define PREFETCH_SHALLOW (1 << PREFETCH_SHIFT) +#define PREFETCH_MODERATE (2 << PREFETCH_SHIFT) +#define PREFETCH_DEEP (3 << PREFETCH_SHIFT) +#define GFX_ACTLR_PRR (1 << 5) + +static const struct of_device_id qcom_smmu_actlr_client_of_match[] = { + { .compatible = "qcom,adreno", + .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) }, + { .compatible = "qcom,adreno-gmu", + .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) }, + { .compatible = "qcom,adreno-smmu", + .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) }, + { .compatible = "qcom,fastrpc", + .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) }, + { .compatible = "qcom,sc7280-mdss", + .data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) }, + { .compatible = "qcom,sc7280-venus", + .data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) }, + { .compatible = "qcom,sm8550-mdss", + .data = (const void *) (PREFETCH_DEFAULT | CMTLB) }, + { } +}; + static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu) { return container_of(smmu, struct qcom_smmu, smmu); @@ -78,25 +112,80 @@ static void qcom_adreno_smmu_set_stall(const void *cookie, bool enabled) { struct arm_smmu_domain *smmu_domain = (void *)cookie; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - struct qcom_smmu *qsmmu = to_qcom_smmu(smmu_domain->smmu); + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); + u32 mask = BIT(cfg->cbndx); + bool stall_changed = !!(qsmmu->stall_enabled & mask) != enabled; + unsigned long flags; if (enabled) - qsmmu->stall_enabled |= BIT(cfg->cbndx); + qsmmu->stall_enabled |= mask; else - qsmmu->stall_enabled &= ~BIT(cfg->cbndx); + qsmmu->stall_enabled &= ~mask; + + /* + * If the device is on and we changed the setting, update the register. + * The spec pseudocode says that CFCFG is resampled after a fault, and + * we believe that no implementations cache it in the TLB, so it should + * be safe to change it without a TLB invalidation. + */ + if (stall_changed && pm_runtime_get_if_active(smmu->dev) > 0) { + u32 reg; + + spin_lock_irqsave(&smmu_domain->cb_lock, flags); + reg = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR); + + if (enabled) + reg |= ARM_SMMU_SCTLR_CFCFG; + else + reg &= ~ARM_SMMU_SCTLR_CFCFG; + + arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR, reg); + spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); + + pm_runtime_put_autosuspend(smmu->dev); + } } -static void qcom_adreno_smmu_resume_translation(const void *cookie, bool terminate) +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set) { struct arm_smmu_domain *smmu_domain = (void *)cookie; - struct arm_smmu_cfg *cfg = &smmu_domain->cfg; struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; u32 reg = 0; + int ret; + + ret = pm_runtime_resume_and_get(smmu->dev); + if (ret < 0) { + dev_err(smmu->dev, "failed to get runtime PM: %d\n", ret); + return; + } + + reg = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR); + reg &= ~GFX_ACTLR_PRR; + if (set) + reg |= FIELD_PREP(GFX_ACTLR_PRR, 1); + arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg); + pm_runtime_put_autosuspend(smmu->dev); +} + +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr) +{ + struct arm_smmu_domain *smmu_domain = (void *)cookie; + struct arm_smmu_device *smmu = smmu_domain->smmu; + int ret; - if (terminate) - reg |= ARM_SMMU_RESUME_TERMINATE; + ret = pm_runtime_resume_and_get(smmu->dev); + if (ret < 0) { + dev_err(smmu->dev, "failed to get runtime PM: %d\n", ret); + return; + } - arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg); + writel_relaxed(lower_32_bits(page_addr), + smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR); + writel_relaxed(upper_32_bits(page_addr), + smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR); + pm_runtime_put_autosuspend(smmu->dev); } #define QCOM_ADRENO_SMMU_GPU_SID 0 @@ -207,13 +296,37 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu) return true; } +static void qcom_smmu_set_actlr_dev(struct device *dev, struct arm_smmu_device *smmu, int cbndx, + const struct of_device_id *client_match) +{ + const struct of_device_id *match = + of_match_device(client_match, dev); + + if (!match) { + dev_dbg(dev, "no ACTLR settings present\n"); + return; + } + + arm_smmu_cb_write(smmu, cbndx, ARM_SMMU_CB_ACTLR, (unsigned long)match->data); +} + static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg, struct device *dev) { + const struct device_node *np = smmu_domain->smmu->dev->of_node; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); + const struct of_device_id *client_match; + int cbndx = smmu_domain->cfg.cbndx; struct adreno_smmu_priv *priv; smmu_domain->cfg.flush_walk_prefer_tlbiasid = true; + client_match = qsmmu->data->client_match; + + if (client_match) + qcom_smmu_set_actlr_dev(dev, smmu, cbndx, client_match); + /* Only enable split pagetables for the GPU device (SID 0) */ if (!qcom_adreno_smmu_is_gpu_device(dev)) return 0; @@ -238,7 +351,15 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, priv->set_ttbr0_cfg = qcom_adreno_smmu_set_ttbr0_cfg; priv->get_fault_info = qcom_adreno_smmu_get_fault_info; priv->set_stall = qcom_adreno_smmu_set_stall; - priv->resume_translation = qcom_adreno_smmu_resume_translation; + priv->set_prr_bit = NULL; + priv->set_prr_addr = NULL; + + if (of_device_is_compatible(np, "qcom,smmu-500") && + !of_device_is_compatible(np, "qcom,sm8250-smmu-500") && + of_device_is_compatible(np, "qcom,adreno-smmu")) { + priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit; + priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr; + } return 0; } @@ -249,6 +370,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,mdp4" }, { .compatible = "qcom,mdss" }, { .compatible = "qcom,qcm2290-mdss" }, + { .compatible = "qcom,sar2130p-mdss" }, { .compatible = "qcom,sc7180-mdss" }, { .compatible = "qcom,sc7180-mss-pil" }, { .compatible = "qcom,sc7280-mdss" }, @@ -258,6 +380,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,sdm670-mdss" }, { .compatible = "qcom,sdm845-mdss" }, { .compatible = "qcom,sdm845-mss-pil" }, + { .compatible = "qcom,sm6115-mdss" }, { .compatible = "qcom,sm6350-mdss" }, { .compatible = "qcom,sm6375-mdss" }, { .compatible = "qcom,sm8150-mdss" }, @@ -269,8 +392,18 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain, struct io_pgtable_cfg *pgtbl_cfg, struct device *dev) { + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); + const struct of_device_id *client_match; + int cbndx = smmu_domain->cfg.cbndx; + smmu_domain->cfg.flush_walk_prefer_tlbiasid = true; + client_match = qsmmu->data->client_match; + + if (client_match) + qcom_smmu_set_actlr_dev(dev, smmu, cbndx, client_match); + return 0; } @@ -468,6 +601,7 @@ static const struct arm_smmu_impl qcom_adreno_smmu_v2_impl = { .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank, .write_sctlr = qcom_adreno_smmu_write_sctlr, .tlb_sync = qcom_smmu_tlb_sync, + .context_fault_needs_threaded_irq = true, }; static const struct arm_smmu_impl qcom_adreno_smmu_500_impl = { @@ -477,6 +611,7 @@ static const struct arm_smmu_impl qcom_adreno_smmu_500_impl = { .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank, .write_sctlr = qcom_adreno_smmu_write_sctlr, .tlb_sync = qcom_smmu_tlb_sync, + .context_fault_needs_threaded_irq = true, }; static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, @@ -507,7 +642,7 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, return ERR_PTR(-ENOMEM); qsmmu->smmu.impl = impl; - qsmmu->cfg = data->cfg; + qsmmu->data = data; return &qsmmu->smmu; } @@ -550,6 +685,7 @@ static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = { .impl = &qcom_smmu_500_impl, .adreno_impl = &qcom_adreno_smmu_500_impl, .cfg = &qcom_smmu_impl0_cfg, + .client_match = qcom_smmu_actlr_client_of_match, }; /* @@ -567,6 +703,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_v2_data }, + { .compatible = "qcom,sdm670-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data }, { .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data}, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h index 3c134d1a6277..8addd453f5f1 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h @@ -8,7 +8,7 @@ struct qcom_smmu { struct arm_smmu_device smmu; - const struct qcom_smmu_config *cfg; + const struct qcom_smmu_match_data *data; bool bypass_quirk; u8 bypass_cbndx; u32 stall_enabled; @@ -28,6 +28,7 @@ struct qcom_smmu_match_data { const struct qcom_smmu_config *cfg; const struct arm_smmu_impl *impl; const struct arm_smmu_impl *adreno_impl; + const struct of_device_id * const client_match; }; irqreturn_t qcom_smmu_context_fault(int irq, void *dev); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index ade4684c14c9..4ced4b5bee4d 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -34,6 +34,7 @@ #include <linux/pm_runtime.h> #include <linux/ratelimit.h> #include <linux/slab.h> +#include <linux/string_choices.h> #include <linux/fsl/mc.h> @@ -78,8 +79,11 @@ static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu) static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu) { - if (pm_runtime_enabled(smmu->dev)) - pm_runtime_put_autosuspend(smmu->dev); + if (pm_runtime_enabled(smmu->dev)) { + pm_runtime_mark_last_busy(smmu->dev); + __pm_runtime_put_autosuspend(smmu->dev); + + } } static void arm_smmu_rpm_use_autosuspend(struct arm_smmu_device *smmu) @@ -105,7 +109,7 @@ static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) } static struct platform_driver arm_smmu_driver; -static struct iommu_ops arm_smmu_ops; +static const struct iommu_ops arm_smmu_ops; #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS static struct device_node *dev_get_dev_node(struct device *dev) @@ -470,6 +474,12 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) arm_smmu_print_context_fault_info(smmu, idx, &cfi); arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); + + if (cfi.fsr & ARM_SMMU_CB_FSR_SS) { + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, + ret == -EAGAIN ? 0 : ARM_SMMU_RESUME_TERMINATE); + } + return IRQ_HANDLED; } @@ -909,6 +919,8 @@ static void arm_smmu_destroy_domain_context(struct arm_smmu_domain *smmu_domain) static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) { struct arm_smmu_domain *smmu_domain; + struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); + struct arm_smmu_device *smmu = cfg->smmu; /* * Allocate the domain and initialise some of its data structures. @@ -921,6 +933,7 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) mutex_init(&smmu_domain->init_mutex); spin_lock_init(&smmu_domain->cb_lock); + smmu_domain->domain.pgsize_bitmap = smmu->pgsize_bitmap; return &smmu_domain->domain; } @@ -1194,7 +1207,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) /* Looks ok, so add the device to the domain */ arm_smmu_master_install_s2crs(cfg, S2CR_TYPE_TRANS, smmu_domain->cfg.cbndx, fwspec); - arm_smmu_rpm_use_autosuspend(smmu); rpm_put: arm_smmu_rpm_put(smmu); return ret; @@ -1217,7 +1229,6 @@ static int arm_smmu_attach_dev_type(struct device *dev, return ret; arm_smmu_master_install_s2crs(cfg, type, 0, fwspec); - arm_smmu_rpm_use_autosuspend(smmu); arm_smmu_rpm_put(smmu); return 0; } @@ -1411,8 +1422,8 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) static struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode) { - struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver, - fwnode); + struct device *dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode); + put_device(dev); return dev ? dev_get_drvdata(dev) : NULL; } @@ -1437,17 +1448,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) goto out_free; } else { smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); - - /* - * Defer probe if the relevant SMMU instance hasn't finished - * probing yet. This is a fragile hack and we'd ideally - * avoid this race in the core code. Until that's ironed - * out, however, this is the most pragmatic option on the - * table. - */ - if (!smmu) - return ERR_PTR(dev_err_probe(dev, -EPROBE_DEFER, - "smmu dev has not bound yet\n")); } ret = -EINVAL; @@ -1496,7 +1496,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) out_cfg_free: kfree(cfg); out_free: - iommu_fwspec_free(dev); return ERR_PTR(ret); } @@ -1631,7 +1630,7 @@ static int arm_smmu_def_domain_type(struct device *dev) return 0; } -static struct iommu_ops arm_smmu_ops = { +static const struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, @@ -1643,7 +1642,6 @@ static struct iommu_ops arm_smmu_ops = { .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, .def_domain_type = arm_smmu_def_domain_type, - .pgsize_bitmap = -1UL, /* Restricted during device attach */ .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = arm_smmu_attach_dev, @@ -1923,10 +1921,6 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K) smmu->pgsize_bitmap |= SZ_64K | SZ_512M; - if (arm_smmu_ops.pgsize_bitmap == -1UL) - arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap; - else - arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap; dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n", smmu->pgsize_bitmap); @@ -2117,7 +2111,7 @@ static void arm_smmu_rmr_install_bypass_smr(struct arm_smmu_device *smmu) } dev_notice(smmu->dev, "\tpreserved %d boot mapping%s\n", cnt, - cnt == 1 ? "" : "s"); + str_plural(cnt)); iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); } @@ -2227,29 +2221,26 @@ static int arm_smmu_device_probe(struct platform_device *pdev) i, irq); } + platform_set_drvdata(pdev, smmu); + + /* Check for RMRs and install bypass SMRs if any */ + arm_smmu_rmr_install_bypass_smr(smmu); + + arm_smmu_device_reset(smmu); + arm_smmu_test_smr_masks(smmu); + err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL, "smmu.%pa", &smmu->ioaddr); - if (err) { - dev_err(dev, "Failed to register iommu in sysfs\n"); - return err; - } + if (err) + return dev_err_probe(dev, err, "Failed to register iommu in sysfs\n"); err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, using_legacy_binding ? NULL : dev); if (err) { - dev_err(dev, "Failed to register iommu\n"); iommu_device_sysfs_remove(&smmu->iommu); - return err; + return dev_err_probe(dev, err, "Failed to register iommu\n"); } - platform_set_drvdata(pdev, smmu); - - /* Check for RMRs and install bypass SMRs if any */ - arm_smmu_rmr_install_bypass_smr(smmu); - - arm_smmu_device_reset(smmu); - arm_smmu_test_smr_masks(smmu); - /* * We want to avoid touching dev->power.lock in fastpaths unless * it's really going to do something useful - pm_runtime_enabled() @@ -2259,6 +2250,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (dev->pm_domain) { pm_runtime_set_active(dev); pm_runtime_enable(dev); + arm_smmu_rpm_use_autosuspend(smmu); } return 0; @@ -2367,7 +2359,7 @@ static struct platform_driver arm_smmu_driver = { .suppress_bind_attrs = true, }, .probe = arm_smmu_device_probe, - .remove_new = arm_smmu_device_remove, + .remove = arm_smmu_device_remove, .shutdown = arm_smmu_device_shutdown, }; module_platform_driver(arm_smmu_driver); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index e2aeb511ae90..2dbf3243b5ad 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_SCTLR_M BIT(0) #define ARM_SMMU_CB_ACTLR 0x4 +#define ARM_SMMU_GFX_PRR_CFG_LADDR 0x6008 +#define ARM_SMMU_GFX_PRR_CFG_UADDR 0x600C #define ARM_SMMU_CB_RESUME 0x8 #define ARM_SMMU_RESUME_TERMINATE BIT(0) diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index b98a7a598b89..c5be95e56031 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -229,7 +229,7 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, goto out_unlock; pgtbl_cfg = (struct io_pgtable_cfg) { - .pgsize_bitmap = qcom_iommu_ops.pgsize_bitmap, + .pgsize_bitmap = domain->pgsize_bitmap, .ias = 32, .oas = 40, .tlb = &qcom_flush_ops, @@ -246,8 +246,6 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, goto out_clear_iommu; } - /* Update the domain's page sizes to reflect the page table format */ - domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; domain->geometry.aperture_end = (1ULL << pgtbl_cfg.ias) - 1; domain->geometry.force_aperture = true; @@ -337,6 +335,7 @@ static struct iommu_domain *qcom_iommu_domain_alloc_paging(struct device *dev) mutex_init(&qcom_domain->init_mutex); spin_lock_init(&qcom_domain->pgtbl_lock); + qcom_domain->domain.pgsize_bitmap = SZ_4K; return &qcom_domain->domain; } @@ -598,7 +597,6 @@ static const struct iommu_ops qcom_iommu_ops = { .probe_device = qcom_iommu_probe_device, .device_group = generic_device_group, .of_xlate = qcom_iommu_of_xlate, - .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = qcom_iommu_attach_dev, .map_pages = qcom_iommu_map, @@ -759,7 +757,7 @@ static struct platform_driver qcom_iommu_ctx_driver = { .of_match_table = ctx_of_match, }, .probe = qcom_iommu_ctx_probe, - .remove_new = qcom_iommu_ctx_remove, + .remove = qcom_iommu_ctx_remove, }; static bool qcom_iommu_has_secure_context(struct qcom_iommu_dev *qcom_iommu) @@ -931,7 +929,7 @@ static struct platform_driver qcom_iommu_driver = { .pm = &qcom_iommu_pm_ops, }, .probe = qcom_iommu_device_probe, - .remove_new = qcom_iommu_device_remove, + .remove = qcom_iommu_device_remove, }; static int __init qcom_iommu_init(void) |