diff options
Diffstat (limited to 'drivers/iommu')
67 files changed, 4729 insertions, 2685 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index ec1b5e32b972..0a33d995d15d 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -154,7 +154,6 @@ config IOMMU_DMA select DMA_OPS_HELPERS select IOMMU_API select IOMMU_IOVA - select IRQ_MSI_IOMMU select NEED_SG_DMA_LENGTH select NEED_SG_DMA_FLAGS if SWIOTLB @@ -193,6 +192,7 @@ config MSM_IOMMU If unsure, say N here. source "drivers/iommu/amd/Kconfig" +source "drivers/iommu/arm/Kconfig" source "drivers/iommu/intel/Kconfig" source "drivers/iommu/iommufd/Kconfig" source "drivers/iommu/riscv/Kconfig" @@ -200,7 +200,6 @@ source "drivers/iommu/riscv/Kconfig" config IRQ_REMAP bool "Support for Interrupt Remapping" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI - select DMAR_TABLE if INTEL_IOMMU help Supports Interrupt remapping for IO-APIC and MSI devices. To use x2apic mode in the CPU's which support x2APIC enhancements or @@ -315,150 +314,6 @@ config APPLE_DART Say Y here if you are using an Apple SoC. -# ARM IOMMU support -config ARM_SMMU - tristate "ARM Ltd. System MMU (SMMU) Support" - depends on ARM64 || ARM || COMPILE_TEST - depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE - select IOMMU_API - select IOMMU_IO_PGTABLE_LPAE - select ARM_DMA_USE_IOMMU if ARM - help - Support for implementations of the ARM System MMU architecture - versions 1 and 2. - - Say Y here if your SoC includes an IOMMU device implementing - the ARM SMMU architecture. - -config ARM_SMMU_LEGACY_DT_BINDINGS - bool "Support the legacy \"mmu-masters\" devicetree bindings" - depends on ARM_SMMU=y && OF - help - Support for the badly designed and deprecated "mmu-masters" - devicetree bindings. This allows some DMA masters to attach - to the SMMU but does not provide any support via the DMA API. - If you're lucky, you might be able to get VFIO up and running. - - If you say Y here then you'll make me very sad. Instead, say N - and move your firmware to the utopian future that was 2016. - -config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT - bool "Default to disabling bypass on ARM SMMU v1 and v2" - depends on ARM_SMMU - default y - help - Say Y here to (by default) disable bypass streams such that - incoming transactions from devices that are not attached to - an iommu domain will report an abort back to the device and - will not be allowed to pass through the SMMU. - - Any old kernels that existed before this KConfig was - introduced would default to _allowing_ bypass (AKA the - equivalent of NO for this config). However the default for - this option is YES because the old behavior is insecure. - - There are few reasons to allow unmatched stream bypass, and - even fewer good ones. If saying YES here breaks your board - you should work on fixing your board. This KConfig option - is expected to be removed in the future and we'll simply - hardcode the bypass disable in the code. - - NOTE: the kernel command line parameter - 'arm-smmu.disable_bypass' will continue to override this - config. - -config ARM_SMMU_MMU_500_CPRE_ERRATA - bool "Enable errata workaround for CPRE in SMMU reset path" - depends on ARM_SMMU - default y - help - Say Y here (by default) to apply workaround to disable - MMU-500's next-page prefetcher for sake of 4 known errata. - - Say N here only when it is sure that any errata related to - prefetch enablement are not applicable on the platform. - Refer silicon-errata.rst for info on errata IDs. - -config ARM_SMMU_QCOM - def_tristate y - depends on ARM_SMMU && ARCH_QCOM - select QCOM_SCM - help - When running on a Qualcomm platform that has the custom variant - of the ARM SMMU, this needs to be built into the SMMU driver. - -config ARM_SMMU_QCOM_DEBUG - bool "ARM SMMU QCOM implementation defined debug support" - depends on ARM_SMMU_QCOM=y - help - Support for implementation specific debug features in ARM SMMU - hardware found in QTI platforms. This include support for - the Translation Buffer Units (TBU) that can be used to obtain - additional information when debugging memory management issues - like context faults. - - Say Y here to enable debug for issues such as context faults - or TLB sync timeouts which requires implementation defined - register dumps. - -config ARM_SMMU_V3 - tristate "ARM Ltd. System MMU Version 3 (SMMUv3) Support" - depends on ARM64 - select IOMMU_API - select IOMMU_IO_PGTABLE_LPAE - select GENERIC_MSI_IRQ - select IOMMUFD_DRIVER if IOMMUFD - help - Support for implementations of the ARM System MMU architecture - version 3 providing translation support to a PCIe root complex. - - Say Y here if your system includes an IOMMU device implementing - the ARM SMMUv3 architecture. - -if ARM_SMMU_V3 -config ARM_SMMU_V3_SVA - bool "Shared Virtual Addressing support for the ARM SMMUv3" - select IOMMU_SVA - select IOMMU_IOPF - select MMU_NOTIFIER - help - Support for sharing process address spaces with devices using the - SMMUv3. - - Say Y here if your system supports SVA extensions such as PCIe PASID - and PRI. - -config ARM_SMMU_V3_IOMMUFD - bool "Enable IOMMUFD features for ARM SMMUv3 (EXPERIMENTAL)" - depends on IOMMUFD - help - Support for IOMMUFD features intended to support virtual machines - with accelerated virtual IOMMUs. - - Say Y here if you are doing development and testing on this feature. - -config ARM_SMMU_V3_KUNIT_TEST - tristate "KUnit tests for arm-smmu-v3 driver" if !KUNIT_ALL_TESTS - depends on KUNIT - depends on ARM_SMMU_V3_SVA - default KUNIT_ALL_TESTS - help - Enable this option to unit-test arm-smmu-v3 driver functions. - - If unsure, say N. - -config TEGRA241_CMDQV - bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3" - depends on ACPI - help - Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The - CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues - support, except with virtualization capabilities. - - Say Y here if your system is NVIDIA Tegra241 (Grace) or it has the same - CMDQ-V extension. -endif - config S390_IOMMU def_bool y if S390 && PCI depends on S390 && PCI @@ -483,8 +338,7 @@ config MTK_IOMMU config MTK_IOMMU_V1 tristate "MediaTek IOMMU Version 1 (M4U gen1) Support" - depends on ARM - depends on ARCH_MEDIATEK || COMPILE_TEST + depends on (ARCH_MEDIATEK && ARM) || COMPILE_TEST select ARM_DMA_USE_IOMMU select IOMMU_API select MEMORY @@ -496,18 +350,6 @@ config MTK_IOMMU_V1 if unsure, say N here. -config QCOM_IOMMU - # Note: iommu drivers cannot (yet?) be built as modules - bool "Qualcomm IOMMU Support" - depends on ARCH_QCOM || COMPILE_TEST - depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE - select QCOM_SCM - select IOMMU_API - select IOMMU_IO_PGTABLE_LPAE - select ARM_DMA_USE_IOMMU - help - Support for IOMMU on certain Qualcomm SoCs. - config HYPERV_IOMMU bool "Hyper-V IRQ Handling" depends on HYPERV && X86 diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 5e5a83c6c2aa..355294fa9033 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,6 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y += amd/ intel/ arm/ iommufd/ riscv/ +obj-y += arm/ iommufd/ +obj-$(CONFIG_AMD_IOMMU) += amd/ +obj-$(CONFIG_INTEL_IOMMU) += intel/ +obj-$(CONFIG_RISCV_IOMMU) += riscv/ obj-$(CONFIG_IOMMU_API) += iommu.o +obj-$(CONFIG_IOMMU_SUPPORT) += iommu-pages.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o obj-$(CONFIG_IOMMU_DEBUGFS) += iommu-debugfs.o diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index 9de33b2d42f5..59c04a67f398 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o +obj-y += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index e3bf27da1339..29a8864381c3 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -47,7 +47,6 @@ extern unsigned long amd_iommu_pgsize_bitmap; /* Protection domain ops */ void amd_iommu_init_identity_domain(void); struct protection_domain *protection_domain_alloc(void); -void protection_domain_free(struct protection_domain *domain); struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, struct mm_struct *mm); void amd_iommu_domain_free(struct iommu_domain *dom); @@ -148,6 +147,8 @@ static inline int get_pci_sbdf_id(struct pci_dev *pdev) return PCI_SEG_DEVID_TO_SBDF(seg, devid); } +bool amd_iommu_ht_range_ignore(void); + /* * This must be called after device probe completes. During probe * use rlookup_amd_iommu() get the iommu. diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 23caea22f8dc..ccbab3a4811a 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -29,8 +29,6 @@ * some size calculation constants */ #define DEV_TABLE_ENTRY_SIZE 32 -#define ALIAS_TABLE_ENTRY_SIZE 2 -#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) /* Capability offsets used by the driver */ #define MMIO_CAP_HDR_OFFSET 0x00 @@ -111,6 +109,11 @@ #define FEATURE_SNPAVICSUP GENMASK_ULL(7, 5) #define FEATURE_SNPAVICSUP_GAM(x) \ (FIELD_GET(FEATURE_SNPAVICSUP, x) == 0x1) +#define FEATURE_HT_RANGE_IGNORE BIT_ULL(11) + +#define FEATURE_NUM_INT_REMAP_SUP GENMASK_ULL(9, 8) +#define FEATURE_NUM_INT_REMAP_SUP_2K(x) \ + (FIELD_GET(FEATURE_NUM_INT_REMAP_SUP, x) == 0x1) /* Note: * The current driver only support 16-bit PASID. @@ -175,13 +178,16 @@ #define CONTROL_GAM_EN 25 #define CONTROL_GALOG_EN 28 #define CONTROL_GAINT_EN 29 +#define CONTROL_NUM_INT_REMAP_MODE 43 +#define CONTROL_NUM_INT_REMAP_MODE_MASK 0x03 +#define CONTROL_NUM_INT_REMAP_MODE_2K 0x01 #define CONTROL_EPH_EN 45 #define CONTROL_XT_EN 50 #define CONTROL_INTCAPXT_EN 51 #define CONTROL_IRTCACHEDIS 59 #define CONTROL_SNPAVIC_EN 61 -#define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT) +#define CTRL_INV_TO_MASK 7 #define CTRL_INV_TO_NONE 0 #define CTRL_INV_TO_1MS 1 #define CTRL_INV_TO_10MS 2 @@ -309,15 +315,14 @@ #define DTE_IRQ_REMAP_INTCTL (2ULL << 60) #define DTE_IRQ_REMAP_ENABLE 1ULL -/* - * AMD IOMMU hardware only support 512 IRTEs despite - * the architectural limitation of 2048 entries. - */ #define DTE_INTTAB_ALIGNMENT 128 -#define DTE_INTTABLEN_VALUE 9ULL -#define DTE_INTTABLEN (DTE_INTTABLEN_VALUE << 1) #define DTE_INTTABLEN_MASK (0xfULL << 1) -#define MAX_IRQS_PER_TABLE (1 << DTE_INTTABLEN_VALUE) +#define DTE_INTTABLEN_VALUE_512 9ULL +#define DTE_INTTABLEN_512 (DTE_INTTABLEN_VALUE_512 << 1) +#define MAX_IRQS_PER_TABLE_512 BIT(DTE_INTTABLEN_VALUE_512) +#define DTE_INTTABLEN_VALUE_2K 11ULL +#define DTE_INTTABLEN_2K (DTE_INTTABLEN_VALUE_2K << 1) +#define MAX_IRQS_PER_TABLE_2K BIT(DTE_INTTABLEN_VALUE_2K) #define PAGE_MODE_NONE 0x00 #define PAGE_MODE_1_LEVEL 0x01 @@ -492,9 +497,6 @@ extern const struct iommu_ops amd_iommu_ops; /* IVRS indicates that pre-boot remapping was enabled */ extern bool amdr_ivrs_remap_support; -/* kmem_cache to get tables with 128 byte alignement */ -extern struct kmem_cache *amd_iommu_irq_cache; - #define PCI_SBDF_TO_SEGID(sbdf) (((sbdf) >> 16) & 0xffff) #define PCI_SBDF_TO_DEVID(sbdf) ((sbdf) & 0xffff) #define PCI_SEG_DEVID_TO_SBDF(seg, devid) ((((u32)(seg) & 0xffff) << 16) | \ @@ -614,12 +616,6 @@ struct amd_iommu_pci_seg { /* Size of the device table */ u32 dev_table_size; - /* Size of the alias table */ - u32 alias_table_size; - - /* Size of the rlookup table */ - u32 rlookup_table_size; - /* * device table virtual address * @@ -851,6 +847,7 @@ struct iommu_dev_data { struct device *dev; u16 devid; /* PCI Device ID */ + unsigned int max_irqs; /* Maximum IRQs supported by device */ u32 max_pasids; /* Max supported PASIDs */ u32 flags; /* Holds AMD_IOMMU_DEVICE_FLAG_<*> */ int ats_qdep; @@ -928,9 +925,6 @@ struct unity_map_entry { * Data structures for device handling */ -/* size of the dma_ops aperture as power of 2 */ -extern unsigned amd_iommu_aperture_order; - extern bool amd_iommu_force_isolation; /* Max levels of glxval supported */ diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index cb536d372b12..9c17dfa76703 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -12,7 +12,6 @@ #include <linux/acpi.h> #include <linux/list.h> #include <linux/bitmap.h> -#include <linux/slab.h> #include <linux/syscore_ops.h> #include <linux/interrupt.h> #include <linux/msi.h> @@ -219,7 +218,6 @@ static bool __initdata cmdline_maps; static enum iommu_init_state init_state = IOMMU_START_STATE; static int amd_iommu_enable_interrupts(void); -static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg); static bool amd_iommu_pre_enabled = true; @@ -245,17 +243,14 @@ static void init_translation_status(struct amd_iommu *iommu) iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; } -static inline unsigned long tbl_size(int entry_size, int last_bdf) +int amd_iommu_get_num_iommus(void) { - unsigned shift = PAGE_SHIFT + - get_order((last_bdf + 1) * entry_size); - - return 1UL << shift; + return amd_iommus_present; } -int amd_iommu_get_num_iommus(void) +bool amd_iommu_ht_range_ignore(void) { - return amd_iommus_present; + return check_feature2(FEATURE_HT_RANGE_IGNORE); } /* @@ -412,33 +407,26 @@ static void iommu_set_device_table(struct amd_iommu *iommu) &entry, sizeof(entry)); } -/* Generic functions to enable/disable certain features of the IOMMU. */ -void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) +static void iommu_feature_set(struct amd_iommu *iommu, u64 val, u64 mask, u8 shift) { u64 ctrl; ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl |= (1ULL << bit); + mask <<= shift; + ctrl &= ~mask; + ctrl |= (val << shift) & mask; writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); } -static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) +/* Generic functions to enable/disable certain features of the IOMMU. */ +void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) { - u64 ctrl; - - ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl &= ~(1ULL << bit); - writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); + iommu_feature_set(iommu, 1ULL, 1ULL, bit); } -static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout) +static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) { - u64 ctrl; - - ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl &= ~CTRL_INV_TO_MASK; - ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK; - writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); + iommu_feature_set(iommu, 0ULL, 1ULL, bit); } /* Function to enable the hardware */ @@ -643,8 +631,8 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_ /* Allocate per PCI segment device table */ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->dev_table = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, - get_order(pci_seg->dev_table_size)); + pci_seg->dev_table = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32, + pci_seg->dev_table_size); if (!pci_seg->dev_table) return -ENOMEM; @@ -653,16 +641,16 @@ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) { - iommu_free_pages(pci_seg->dev_table, - get_order(pci_seg->dev_table_size)); + iommu_free_pages(pci_seg->dev_table); pci_seg->dev_table = NULL; } /* Allocate per PCI segment IOMMU rlookup table. */ static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->rlookup_table = iommu_alloc_pages(GFP_KERNEL, - get_order(pci_seg->rlookup_table_size)); + pci_seg->rlookup_table = kvcalloc(pci_seg->last_bdf + 1, + sizeof(*pci_seg->rlookup_table), + GFP_KERNEL); if (pci_seg->rlookup_table == NULL) return -ENOMEM; @@ -671,17 +659,15 @@ static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) { - iommu_free_pages(pci_seg->rlookup_table, - get_order(pci_seg->rlookup_table_size)); + kvfree(pci_seg->rlookup_table); pci_seg->rlookup_table = NULL; } static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->irq_lookup_table = iommu_alloc_pages(GFP_KERNEL, - get_order(pci_seg->rlookup_table_size)); - kmemleak_alloc(pci_seg->irq_lookup_table, - pci_seg->rlookup_table_size, 1, GFP_KERNEL); + pci_seg->irq_lookup_table = kvcalloc(pci_seg->last_bdf + 1, + sizeof(*pci_seg->irq_lookup_table), + GFP_KERNEL); if (pci_seg->irq_lookup_table == NULL) return -ENOMEM; @@ -690,9 +676,7 @@ static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_se static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) { - kmemleak_free(pci_seg->irq_lookup_table); - iommu_free_pages(pci_seg->irq_lookup_table, - get_order(pci_seg->rlookup_table_size)); + kvfree(pci_seg->irq_lookup_table); pci_seg->irq_lookup_table = NULL; } @@ -700,8 +684,9 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) { int i; - pci_seg->alias_table = iommu_alloc_pages(GFP_KERNEL, - get_order(pci_seg->alias_table_size)); + pci_seg->alias_table = kvmalloc_array(pci_seg->last_bdf + 1, + sizeof(*pci_seg->alias_table), + GFP_KERNEL); if (!pci_seg->alias_table) return -ENOMEM; @@ -716,8 +701,7 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) { - iommu_free_pages(pci_seg->alias_table, - get_order(pci_seg->alias_table_size)); + kvfree(pci_seg->alias_table); pci_seg->alias_table = NULL; } @@ -728,8 +712,7 @@ static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) */ static int __init alloc_command_buffer(struct amd_iommu *iommu) { - iommu->cmd_buf = iommu_alloc_pages(GFP_KERNEL, - get_order(CMD_BUFFER_SIZE)); + iommu->cmd_buf = iommu_alloc_pages_sz(GFP_KERNEL, CMD_BUFFER_SIZE); return iommu->cmd_buf ? 0 : -ENOMEM; } @@ -826,20 +809,22 @@ static void iommu_disable_command_buffer(struct amd_iommu *iommu) static void __init free_command_buffer(struct amd_iommu *iommu) { - iommu_free_pages(iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); + iommu_free_pages(iommu->cmd_buf); } void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp, size_t size) { - int order = get_order(size); - void *buf = iommu_alloc_pages(gfp, order); + void *buf; - if (buf && - check_feature(FEATURE_SNP) && - set_memory_4k((unsigned long)buf, (1 << order))) { - iommu_free_pages(buf, order); - buf = NULL; + size = PAGE_ALIGN(size); + buf = iommu_alloc_pages_sz(gfp, size); + if (!buf) + return NULL; + if (check_feature(FEATURE_SNP) && + set_memory_4k((unsigned long)buf, size / PAGE_SIZE)) { + iommu_free_pages(buf); + return NULL; } return buf; @@ -882,14 +867,14 @@ static void iommu_disable_event_buffer(struct amd_iommu *iommu) static void __init free_event_buffer(struct amd_iommu *iommu) { - iommu_free_pages(iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); + iommu_free_pages(iommu->evt_buf); } static void free_ga_log(struct amd_iommu *iommu) { #ifdef CONFIG_IRQ_REMAP - iommu_free_pages(iommu->ga_log, get_order(GA_LOG_SIZE)); - iommu_free_pages(iommu->ga_log_tail, get_order(8)); + iommu_free_pages(iommu->ga_log); + iommu_free_pages(iommu->ga_log_tail); #endif } @@ -934,11 +919,11 @@ static int iommu_init_ga_log(struct amd_iommu *iommu) if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) return 0; - iommu->ga_log = iommu_alloc_pages(GFP_KERNEL, get_order(GA_LOG_SIZE)); + iommu->ga_log = iommu_alloc_pages_sz(GFP_KERNEL, GA_LOG_SIZE); if (!iommu->ga_log) goto err_out; - iommu->ga_log_tail = iommu_alloc_pages(GFP_KERNEL, get_order(8)); + iommu->ga_log_tail = iommu_alloc_pages_sz(GFP_KERNEL, 8); if (!iommu->ga_log_tail) goto err_out; @@ -959,7 +944,7 @@ static int __init alloc_cwwb_sem(struct amd_iommu *iommu) static void __init free_cwwb_sem(struct amd_iommu *iommu) { if (iommu->cmd_sem) - iommu_free_page((void *)iommu->cmd_sem); + iommu_free_pages((void *)iommu->cmd_sem); } static void iommu_enable_xt(struct amd_iommu *iommu) @@ -1033,8 +1018,8 @@ static bool __copy_device_table(struct amd_iommu *iommu) if (!old_devtb) return false; - pci_seg->old_dev_tbl_cpy = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, - get_order(pci_seg->dev_table_size)); + pci_seg->old_dev_tbl_cpy = iommu_alloc_pages_sz( + GFP_KERNEL | GFP_DMA32, pci_seg->dev_table_size); if (pci_seg->old_dev_tbl_cpy == NULL) { pr_err("Failed to allocate memory for copying old device table!\n"); memunmap(old_devtb); @@ -1069,7 +1054,8 @@ static bool __copy_device_table(struct amd_iommu *iommu) int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK; if (irq_v && (int_ctl || int_tab_len)) { if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || - (int_tab_len != DTE_INTTABLEN)) { + (int_tab_len != DTE_INTTABLEN_512 && + int_tab_len != DTE_INTTABLEN_2K)) { pr_err("Wrong old irq remapping flag: %#x\n", devid); memunmap(old_devtb); return false; @@ -1607,9 +1593,9 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, pci_seg->last_bdf = last_bdf; DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); - pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf); - pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf); - pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf); + pci_seg->dev_table_size = + max(roundup_pow_of_two((last_bdf + 1) * DEV_TABLE_ENTRY_SIZE), + SZ_4K); pci_seg->id = id; init_llist_head(&pci_seg->dev_data_list); @@ -2038,9 +2024,6 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (!iommu->dev) return -ENODEV; - /* Prevent binding other PCI device drivers to IOMMU devices */ - iommu->dev->match_driver = false; - /* ACPI _PRT won't have an IRQ for IOMMU */ iommu->dev->irq_managed = 1; @@ -2652,7 +2635,7 @@ static void iommu_init_flags(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_COHERENT_EN); /* Set IOTLB invalidation timeout to 1s */ - iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S); + iommu_feature_set(iommu, CTRL_INV_TO_1S, CTRL_INV_TO_MASK, CONTROL_INV_TIMEOUT); /* Enable Enhanced Peripheral Page Request Handling */ if (check_feature(FEATURE_EPHSUP)) @@ -2745,6 +2728,17 @@ static void iommu_enable_irtcachedis(struct amd_iommu *iommu) iommu->irtcachedis_enabled ? "disabled" : "enabled"); } +static void iommu_enable_2k_int(struct amd_iommu *iommu) +{ + if (!FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) + return; + + iommu_feature_set(iommu, + CONTROL_NUM_INT_REMAP_MODE_2K, + CONTROL_NUM_INT_REMAP_MODE_MASK, + CONTROL_NUM_INT_REMAP_MODE); +} + static void early_enable_iommu(struct amd_iommu *iommu) { iommu_disable(iommu); @@ -2757,6 +2751,7 @@ static void early_enable_iommu(struct amd_iommu *iommu) iommu_enable_ga(iommu); iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); + iommu_enable_2k_int(iommu); iommu_enable(iommu); amd_iommu_flush_all_caches(iommu); } @@ -2785,8 +2780,7 @@ static void early_enable_iommus(void) for_each_pci_segment(pci_seg) { if (pci_seg->old_dev_tbl_cpy != NULL) { - iommu_free_pages(pci_seg->old_dev_tbl_cpy, - get_order(pci_seg->dev_table_size)); + iommu_free_pages(pci_seg->old_dev_tbl_cpy); pci_seg->old_dev_tbl_cpy = NULL; } } @@ -2799,8 +2793,7 @@ static void early_enable_iommus(void) pr_info("Copied DEV table from previous kernel.\n"); for_each_pci_segment(pci_seg) { - iommu_free_pages(pci_seg->dev_table, - get_order(pci_seg->dev_table_size)); + iommu_free_pages(pci_seg->dev_table); pci_seg->dev_table = pci_seg->old_dev_tbl_cpy; } @@ -2813,6 +2806,7 @@ static void early_enable_iommus(void) iommu_enable_ga(iommu); iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); + iommu_enable_2k_int(iommu); iommu_set_device_table(iommu); amd_iommu_flush_all_caches(iommu); } @@ -2939,9 +2933,6 @@ static struct syscore_ops amd_iommu_syscore_ops = { static void __init free_iommu_resources(void) { - kmem_cache_destroy(amd_iommu_irq_cache); - amd_iommu_irq_cache = NULL; - free_iommu_all(); free_pci_segments(); } @@ -3040,7 +3031,7 @@ static void __init ivinfo_init(void *ivrs) static int __init early_amd_iommu_init(void) { struct acpi_table_header *ivrs_base; - int remap_cache_sz, ret; + int ret; acpi_status status; if (!amd_iommu_detected) @@ -3102,22 +3093,7 @@ static int __init early_amd_iommu_init(void) if (amd_iommu_irq_remap) { struct amd_iommu_pci_seg *pci_seg; - /* - * Interrupt remapping enabled, create kmem_cache for the - * remapping tables. - */ ret = -ENOMEM; - if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) - remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32); - else - remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2); - amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", - remap_cache_sz, - DTE_INTTAB_ALIGNMENT, - 0, NULL); - if (!amd_iommu_irq_cache) - goto out; - for_each_pci_segment(pci_seg) { if (alloc_irq_lookup_table(pci_seg)) goto out; @@ -3677,6 +3653,14 @@ found: while (*uid == '0' && *(uid + 1)) uid++; + if (strlen(hid) >= ACPIHID_HID_LEN) { + pr_err("Invalid command line: hid is too long\n"); + return 1; + } else if (strlen(uid) >= ACPIHID_UID_LEN) { + pr_err("Invalid command line: uid is too long\n"); + return 1; + } + i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index f3399087859f..4d308c071134 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -47,21 +47,7 @@ static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, return fpte; } -/**************************************************************************** - * - * The functions below are used the create the page table mappings for - * unity mapped regions. - * - ****************************************************************************/ - -static void free_pt_page(u64 *pt, struct list_head *freelist) -{ - struct page *p = virt_to_page(pt); - - list_add_tail(&p->lru, freelist); -} - -static void free_pt_lvl(u64 *pt, struct list_head *freelist, int lvl) +static void free_pt_lvl(u64 *pt, struct iommu_pages_list *freelist, int lvl) { u64 *p; int i; @@ -84,20 +70,20 @@ static void free_pt_lvl(u64 *pt, struct list_head *freelist, int lvl) if (lvl > 2) free_pt_lvl(p, freelist, lvl - 1); else - free_pt_page(p, freelist); + iommu_pages_list_add(freelist, p); } - free_pt_page(pt, freelist); + iommu_pages_list_add(freelist, pt); } -static void free_sub_pt(u64 *root, int mode, struct list_head *freelist) +static void free_sub_pt(u64 *root, int mode, struct iommu_pages_list *freelist) { switch (mode) { case PAGE_MODE_NONE: case PAGE_MODE_7_LEVEL: break; case PAGE_MODE_1_LEVEL: - free_pt_page(root, freelist); + iommu_pages_list_add(freelist, root); break; case PAGE_MODE_2_LEVEL: case PAGE_MODE_3_LEVEL: @@ -128,7 +114,7 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable, bool ret = true; u64 *pte; - pte = iommu_alloc_page_node(cfg->amd.nid, gfp); + pte = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp, SZ_4K); if (!pte) return false; @@ -153,7 +139,7 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable, out: spin_unlock_irqrestore(&domain->lock, flags); - iommu_free_page(pte); + iommu_free_pages(pte); return ret; } @@ -220,7 +206,8 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable, if (!IOMMU_PTE_PRESENT(__pte) || pte_level == PAGE_MODE_NONE) { - page = iommu_alloc_page_node(cfg->amd.nid, gfp); + page = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp, + SZ_4K); if (!page) return NULL; @@ -229,7 +216,7 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable, /* pte could have been changed somewhere. */ if (!try_cmpxchg64(pte, &__pte, __npte)) - iommu_free_page(page); + iommu_free_pages(page); else if (IOMMU_PTE_PRESENT(__pte)) *updated = true; @@ -306,7 +293,8 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable, return pte; } -static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist) +static void free_clear_pte(u64 *pte, u64 pteval, + struct iommu_pages_list *freelist) { u64 *pt; int mode; @@ -335,7 +323,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, int prot, gfp_t gfp, size_t *mapped) { struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); - LIST_HEAD(freelist); + struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist); bool updated = false; u64 __pte, *pte; int ret, i, count; @@ -360,7 +348,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, for (i = 0; i < count; ++i) free_clear_pte(&pte[i], pte[i], &freelist); - if (!list_empty(&freelist)) + if (!iommu_pages_list_empty(&freelist)) updated = true; if (count > 1) { @@ -531,7 +519,7 @@ static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops, static void v1_free_pgtable(struct io_pgtable *iop) { struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl); - LIST_HEAD(freelist); + struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist); if (pgtable->mode == PAGE_MODE_NONE) return; @@ -548,7 +536,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); - pgtable->root = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL); + pgtable->root = + iommu_alloc_pages_node_sz(cfg->amd.nid, GFP_KERNEL, SZ_4K); if (!pgtable->root) return NULL; pgtable->mode = PAGE_MODE_3_LEVEL; diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index c616de2c5926..b47941353ccb 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -121,10 +121,10 @@ static void free_pgtable(u64 *pt, int level) if (level > 2) free_pgtable(p, level - 1); else - iommu_free_page(p); + iommu_free_pages(p); } - iommu_free_page(pt); + iommu_free_pages(pt); } /* Allocate page table */ @@ -152,14 +152,14 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova, } if (!IOMMU_PTE_PRESENT(__pte)) { - page = iommu_alloc_page_node(nid, gfp); + page = iommu_alloc_pages_node_sz(nid, gfp, SZ_4K); if (!page) return NULL; __npte = set_pgtable_attr(page); /* pte could have been changed somewhere. */ if (!try_cmpxchg64(pte, &__pte, __npte)) - iommu_free_page(page); + iommu_free_pages(page); else if (IOMMU_PTE_PRESENT(__pte)) *updated = true; @@ -181,7 +181,7 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova, if (pg_size == IOMMU_PAGE_SIZE_1G) free_pgtable(__pte, end_level - 1); else if (pg_size == IOMMU_PAGE_SIZE_2M) - iommu_free_page(__pte); + iommu_free_pages(__pte); } return pte; @@ -254,7 +254,7 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, pte = v2_alloc_pte(cfg->amd.nid, pgtable->pgd, iova, map_size, gfp, &updated); if (!pte) { - ret = -EINVAL; + ret = -ENOMEM; goto out; } @@ -346,7 +346,7 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); int ias = IOMMU_IN_ADDR_BIT_SIZE; - pgtable->pgd = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL); + pgtable->pgd = iommu_alloc_pages_node_sz(cfg->amd.nid, GFP_KERNEL, SZ_4K); if (!pgtable->pgd) return NULL; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index cd5116d8c3b2..aea061f26de3 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -75,8 +75,6 @@ struct iommu_cmd { */ DEFINE_IDA(pdom_ids); -struct kmem_cache *amd_iommu_irq_cache; - static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev); @@ -243,7 +241,9 @@ static inline int get_acpihid_device_id(struct device *dev, struct acpihid_map_entry **entry) { struct acpi_device *adev = ACPI_COMPANION(dev); - struct acpihid_map_entry *p; + struct acpihid_map_entry *p, *p1 = NULL; + int hid_count = 0; + bool fw_bug; if (!adev) return -ENODEV; @@ -251,12 +251,33 @@ static inline int get_acpihid_device_id(struct device *dev, list_for_each_entry(p, &acpihid_map, list) { if (acpi_dev_hid_uid_match(adev, p->hid, p->uid[0] ? p->uid : NULL)) { - if (entry) - *entry = p; - return p->devid; + p1 = p; + fw_bug = false; + hid_count = 1; + break; + } + + /* + * Count HID matches w/o UID, raise FW_BUG but allow exactly one match + */ + if (acpi_dev_hid_match(adev, p->hid)) { + p1 = p; + hid_count++; + fw_bug = true; } } - return -EINVAL; + + if (!p1) + return -EINVAL; + if (fw_bug) + dev_err_once(dev, FW_BUG "No ACPI device matched UID, but %d device%s matched HID.\n", + hid_count, hid_count > 1 ? "s" : ""); + if (hid_count > 1) + return -EINVAL; + if (entry) + *entry = p1; + + return p1->devid; } static inline int get_device_sbdf_id(struct device *dev) @@ -613,8 +634,8 @@ static inline void pdev_disable_cap_pasid(struct pci_dev *pdev) static void pdev_enable_caps(struct pci_dev *pdev) { - pdev_enable_cap_ats(pdev); pdev_enable_cap_pasid(pdev); + pdev_enable_cap_ats(pdev); pdev_enable_cap_pri(pdev); } @@ -868,7 +889,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) int type, devid, flags, tag; volatile u32 *event = __evt; int count = 0; - u64 address; + u64 address, ctrl; u32 pasid; retry: @@ -878,6 +899,7 @@ retry: (event[1] & EVENT_DOMID_MASK_LO); flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; address = (u64)(((u64)event[3]) << 32) | event[2]; + ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); if (type == 0) { /* Did we hit the erratum? */ @@ -899,6 +921,7 @@ retry: dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); + dev_err(dev, "Control Reg : 0x%llx\n", ctrl); dump_dte_entry(iommu, devid); break; case EVENT_TYPE_DEV_TAB_ERR: @@ -982,6 +1005,14 @@ int amd_iommu_register_ga_log_notifier(int (*notifier)(u32)) { iommu_ga_log_notifier = notifier; + /* + * Ensure all in-flight IRQ handlers run to completion before returning + * to the caller, e.g. to ensure module code isn't unloaded while it's + * being executed in the IRQ handler. + */ + if (!notifier) + synchronize_rcu(); + return 0; } EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier); @@ -1812,7 +1843,7 @@ static void free_gcr3_tbl_level1(u64 *tbl) ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK); - iommu_free_page(ptr); + iommu_free_pages(ptr); } } @@ -1845,7 +1876,7 @@ static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info) /* Free per device domain ID */ pdom_id_free(gcr3_info->domid); - iommu_free_page(gcr3_info->gcr3_tbl); + iommu_free_pages(gcr3_info->gcr3_tbl); gcr3_info->gcr3_tbl = NULL; } @@ -1884,7 +1915,7 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info, return -ENOSPC; gcr3_info->domid = domid; - gcr3_info->gcr3_tbl = iommu_alloc_page_node(nid, GFP_ATOMIC); + gcr3_info->gcr3_tbl = iommu_alloc_pages_node_sz(nid, GFP_ATOMIC, SZ_4K); if (gcr3_info->gcr3_tbl == NULL) { pdom_id_free(domid); return -ENOMEM; @@ -2394,8 +2425,14 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) } out_err: + iommu_completion_wait(iommu); + if (FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) + dev_data->max_irqs = MAX_IRQS_PER_TABLE_2K; + else + dev_data->max_irqs = MAX_IRQS_PER_TABLE_512; + if (dev_is_pci(dev)) pci_prepare_ats(to_pci_dev(dev), PAGE_SHIFT); @@ -2432,15 +2469,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev) * *****************************************************************************/ -void protection_domain_free(struct protection_domain *domain) -{ - WARN_ON(!list_empty(&domain->dev_list)); - if (domain->domain.type & __IOMMU_DOMAIN_PAGING) - free_io_pgtable_ops(&domain->iop.pgtbl.ops); - pdom_id_free(domain->id); - kfree(domain); -} - static void protection_domain_init(struct protection_domain *domain) { spin_lock_init(&domain->lock); @@ -2498,8 +2526,21 @@ static inline u64 dma_max_address(enum protection_domain_mode pgtable) if (pgtable == PD_MODE_V1) return ~0ULL; - /* V2 with 4/5 level page table */ - return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1); + /* + * V2 with 4/5 level page table. Note that "2.2.6.5 AMD64 4-Kbyte Page + * Translation" shows that the V2 table sign extends the top of the + * address space creating a reserved region in the middle of the + * translation, just like the CPU does. Further Vasant says the docs are + * incomplete and this only applies to non-zero PASIDs. If the AMDv2 + * page table is assigned to the 0 PASID then there is no sign extension + * check. + * + * Since the IOMMU must have a fixed geometry, and the core code does + * not understand sign extended addressing, we have to chop off the high + * bit to get consistent behavior with attachments of the domain to any + * PASID. + */ + return ((1ULL << (PM_LEVEL_SHIFT(amd_iommu_gpt_level) - 1)) - 1); } static bool amd_iommu_hd_support(struct amd_iommu *iommu) @@ -2578,7 +2619,11 @@ void amd_iommu_domain_free(struct iommu_domain *dom) { struct protection_domain *domain = to_pdomain(dom); - protection_domain_free(domain); + WARN_ON(!list_empty(&domain->dev_list)); + if (domain->domain.type & __IOMMU_DOMAIN_PAGING) + free_io_pgtable_ops(&domain->iop.pgtbl.ops); + pdom_id_free(domain->id); + kfree(domain); } static int blocked_domain_attach_device(struct iommu_domain *domain, @@ -2907,6 +2952,9 @@ static void amd_iommu_get_resv_regions(struct device *dev, return; list_add_tail(®ion->list, head); + if (amd_iommu_ht_range_ignore()) + return; + region = iommu_alloc_resv_region(HT_RANGE_START, HT_RANGE_END - HT_RANGE_START + 1, 0, IOMMU_RESV_RESERVED, GFP_KERNEL); @@ -2983,38 +3031,6 @@ static const struct iommu_dirty_ops amd_dirty_ops = { .read_and_clear_dirty = amd_iommu_read_and_clear_dirty, }; -static int amd_iommu_dev_enable_feature(struct device *dev, - enum iommu_dev_features feat) -{ - int ret = 0; - - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - case IOMMU_DEV_FEAT_SVA: - break; - default: - ret = -EINVAL; - break; - } - return ret; -} - -static int amd_iommu_dev_disable_feature(struct device *dev, - enum iommu_dev_features feat) -{ - int ret = 0; - - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - case IOMMU_DEV_FEAT_SVA: - break; - default: - ret = -EINVAL; - break; - } - return ret; -} - const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .blocked_domain = &blocked_domain, @@ -3028,8 +3044,6 @@ const struct iommu_ops amd_iommu_ops = { .get_resv_regions = amd_iommu_get_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, .def_domain_type = amd_iommu_def_domain_type, - .dev_enable_feat = amd_iommu_dev_enable_feature, - .dev_disable_feat = amd_iommu_dev_disable_feature, .page_response = amd_iommu_page_response, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = amd_iommu_attach_device, @@ -3081,6 +3095,13 @@ out: raw_spin_unlock_irqrestore(&iommu->lock, flags); } +static inline u8 iommu_get_int_tablen(struct iommu_dev_data *dev_data) +{ + if (dev_data && dev_data->max_irqs == MAX_IRQS_PER_TABLE_2K) + return DTE_INTTABLEN_2K; + return DTE_INTTABLEN_512; +} + static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid, struct irq_remap_table *table) { @@ -3095,7 +3116,7 @@ static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid, new &= ~DTE_IRQ_PHYS_ADDR_MASK; new |= iommu_virt_to_phys(table->table); new |= DTE_IRQ_REMAP_INTCTL; - new |= DTE_INTTABLEN; + new |= iommu_get_int_tablen(dev_data); new |= DTE_IRQ_REMAP_ENABLE; WRITE_ONCE(dte->data[2], new); @@ -3121,7 +3142,7 @@ static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid) return table; } -static struct irq_remap_table *__alloc_irq_table(void) +static struct irq_remap_table *__alloc_irq_table(int nid, size_t size) { struct irq_remap_table *table; @@ -3129,19 +3150,14 @@ static struct irq_remap_table *__alloc_irq_table(void) if (!table) return NULL; - table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL); + table->table = iommu_alloc_pages_node_sz( + nid, GFP_KERNEL, max(DTE_INTTAB_ALIGNMENT, size)); if (!table->table) { kfree(table); return NULL; } raw_spin_lock_init(&table->lock); - if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) - memset(table->table, 0, - MAX_IRQS_PER_TABLE * sizeof(u32)); - else - memset(table->table, 0, - (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2))); return table; } @@ -3173,13 +3189,23 @@ static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias, return 0; } +static inline size_t get_irq_table_size(unsigned int max_irqs) +{ + if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) + return max_irqs * sizeof(u32); + + return max_irqs * (sizeof(u64) * 2); +} + static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu, - u16 devid, struct pci_dev *pdev) + u16 devid, struct pci_dev *pdev, + unsigned int max_irqs) { struct irq_remap_table *table = NULL; struct irq_remap_table *new_table = NULL; struct amd_iommu_pci_seg *pci_seg; unsigned long flags; + int nid = iommu && iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE; u16 alias; spin_lock_irqsave(&iommu_table_lock, flags); @@ -3198,7 +3224,7 @@ static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu, spin_unlock_irqrestore(&iommu_table_lock, flags); /* Nothing there yet, allocate new irq remapping table */ - new_table = __alloc_irq_table(); + new_table = __alloc_irq_table(nid, get_irq_table_size(max_irqs)); if (!new_table) return NULL; @@ -3233,20 +3259,21 @@ out_unlock: spin_unlock_irqrestore(&iommu_table_lock, flags); if (new_table) { - kmem_cache_free(amd_iommu_irq_cache, new_table->table); + iommu_free_pages(new_table->table); kfree(new_table); } return table; } static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count, - bool align, struct pci_dev *pdev) + bool align, struct pci_dev *pdev, + unsigned long max_irqs) { struct irq_remap_table *table; int index, c, alignment = 1; unsigned long flags; - table = alloc_irq_table(iommu, devid, pdev); + table = alloc_irq_table(iommu, devid, pdev, max_irqs); if (!table) return -ENODEV; @@ -3257,7 +3284,7 @@ static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count, /* Scan table for free entries */ for (index = ALIGN(table->min_index, alignment), c = 0; - index < MAX_IRQS_PER_TABLE;) { + index < max_irqs;) { if (!iommu->irte_ops->is_allocated(table, index)) { c += 1; } else { @@ -3527,6 +3554,14 @@ static void fill_msi_msg(struct msi_msg *msg, u32 index) msg->data = index; msg->address_lo = 0; msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW; + /* + * The struct msi_msg.dest_mode_logical is used to set the DM bit + * in MSI Message Address Register. For device w/ 2K int-remap support, + * this is bit must be set to 1 regardless of the actual destination + * mode, which is signified by the IRTE[DM]. + */ + if (FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) + msg->arch_addr_lo.dest_mode_logical = true; msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; } @@ -3589,6 +3624,8 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, struct amd_ir_data *data = NULL; struct amd_iommu *iommu; struct irq_cfg *cfg; + struct iommu_dev_data *dev_data; + unsigned long max_irqs; int i, ret, devid, seg, sbdf; int index; @@ -3607,6 +3644,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (!iommu) return -EINVAL; + dev_data = search_dev_data(iommu, devid); + max_irqs = dev_data ? dev_data->max_irqs : MAX_IRQS_PER_TABLE_512; + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); if (ret < 0) return ret; @@ -3614,7 +3654,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) { struct irq_remap_table *table; - table = alloc_irq_table(iommu, devid, NULL); + table = alloc_irq_table(iommu, devid, NULL, max_irqs); if (table) { if (!table->min_index) { /* @@ -3635,9 +3675,11 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, bool align = (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI); index = alloc_irq_index(iommu, devid, nr_irqs, align, - msi_desc_to_pci_dev(info->desc)); + msi_desc_to_pci_dev(info->desc), + max_irqs); } else { - index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL); + index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL, + max_irqs); } if (index < 0) { @@ -3840,6 +3882,9 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) struct irq_2_irte *irte_info = &ir_data->irq_2_irte; struct iommu_dev_data *dev_data; + if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))) + return -EINVAL; + if (ir_data->iommu == NULL) return -EINVAL; @@ -3850,21 +3895,11 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) * we should not modify the IRTE */ if (!dev_data || !dev_data->use_vapic) - return 0; + return -EINVAL; ir_data->cfg = irqd_cfg(data); pi_data->ir_data = ir_data; - /* Note: - * SVM tries to set up for VAPIC mode, but we are in - * legacy mode. So, we force legacy mode instead. - */ - if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) { - pr_debug("%s: Fall back to using intr legacy remap\n", - __func__); - pi_data->is_guest_mode = false; - } - pi_data->prev_ga_tag = ir_data->cached_ga_tag; if (pi_data->is_guest_mode) { ir_data->ga_root_ptr = (pi_data->base >> 12); diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c index 11150cfd6718..77c8e9a91cbc 100644 --- a/drivers/iommu/amd/pasid.c +++ b/drivers/iommu/amd/pasid.c @@ -195,7 +195,7 @@ struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, ret = mmu_notifier_register(&pdom->mn, mm); if (ret) { - protection_domain_free(pdom); + amd_iommu_domain_free(&pdom->domain); return ERR_PTR(ret); } diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c index 7c67d69f0b8c..e6767c057d01 100644 --- a/drivers/iommu/amd/ppr.c +++ b/drivers/iommu/amd/ppr.c @@ -48,7 +48,7 @@ void amd_iommu_enable_ppr_log(struct amd_iommu *iommu) void __init amd_iommu_free_ppr_log(struct amd_iommu *iommu) { - iommu_free_pages(iommu->ppr_log, get_order(PPR_LOG_SIZE)); + iommu_free_pages(iommu->ppr_log); } /* diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 95ba3caeb401..757d24f67ad4 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -36,7 +36,7 @@ #define DART_MAX_STREAMS 256 #define DART_MAX_TTBR 4 -#define MAX_DARTS_PER_DEVICE 2 +#define MAX_DARTS_PER_DEVICE 3 /* Common registers */ @@ -277,6 +277,9 @@ struct apple_dart_domain { * @streams: streams for this device */ struct apple_dart_master_cfg { + /* Intersection of DART capabilitles */ + u32 supports_bypass : 1; + struct apple_dart_stream_map stream_maps[MAX_DARTS_PER_DEVICE]; }; @@ -684,7 +687,7 @@ static int apple_dart_attach_dev_identity(struct iommu_domain *domain, struct apple_dart_stream_map *stream_map; int i; - if (!cfg->stream_maps[0].dart->supports_bypass) + if (!cfg->supports_bypass) return -EINVAL; for_each_stream_map(i, cfg, stream_map) @@ -773,8 +776,7 @@ static void apple_dart_domain_free(struct iommu_domain *domain) { struct apple_dart_domain *dart_domain = to_dart_domain(domain); - if (dart_domain->pgtbl_ops) - free_io_pgtable_ops(dart_domain->pgtbl_ops); + free_io_pgtable_ops(dart_domain->pgtbl_ops); kfree(dart_domain); } @@ -792,20 +794,23 @@ static int apple_dart_of_xlate(struct device *dev, return -EINVAL; sid = args->args[0]; - if (!cfg) + if (!cfg) { cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); - if (!cfg) - return -ENOMEM; + if (!cfg) + return -ENOMEM; + /* Will be ANDed with DART capabilities */ + cfg->supports_bypass = true; + } dev_iommu_priv_set(dev, cfg); cfg_dart = cfg->stream_maps[0].dart; if (cfg_dart) { - if (cfg_dart->supports_bypass != dart->supports_bypass) - return -EINVAL; if (cfg_dart->pgsize != dart->pgsize) return -EINVAL; } + cfg->supports_bypass &= dart->supports_bypass; + for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) { if (cfg->stream_maps[i].dart == dart) { set_bit(sid, cfg->stream_maps[i].sidmap); @@ -945,7 +950,7 @@ static int apple_dart_def_domain_type(struct device *dev) if (cfg->stream_maps[0].dart->pgsize > PAGE_SIZE) return IOMMU_DOMAIN_IDENTITY; - if (!cfg->stream_maps[0].dart->supports_bypass) + if (!cfg->supports_bypass) return IOMMU_DOMAIN_DMA; return 0; diff --git a/drivers/iommu/arm/Kconfig b/drivers/iommu/arm/Kconfig new file mode 100644 index 000000000000..ef42bbe07dbe --- /dev/null +++ b/drivers/iommu/arm/Kconfig @@ -0,0 +1,144 @@ +# SPDX-License-Identifier: GPL-2.0-only +# ARM IOMMU support +config ARM_SMMU + tristate "ARM Ltd. System MMU (SMMU) Support" + depends on ARM64 || ARM || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE + select IOMMU_API + select IOMMU_IO_PGTABLE_LPAE + select ARM_DMA_USE_IOMMU if ARM + help + Support for implementations of the ARM System MMU architecture + versions 1 and 2. + + Say Y here if your SoC includes an IOMMU device implementing + the ARM SMMU architecture. + +if ARM_SMMU +config ARM_SMMU_LEGACY_DT_BINDINGS + bool "Support the legacy \"mmu-masters\" devicetree bindings" + depends on ARM_SMMU=y && OF + help + Support for the badly designed and deprecated "mmu-masters" + devicetree bindings. This allows some DMA masters to attach + to the SMMU but does not provide any support via the DMA API. + If you're lucky, you might be able to get VFIO up and running. + + If you say Y here then you'll make me very sad. Instead, say N + and move your firmware to the utopian future that was 2016. + +config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT + bool "Disable unmatched stream bypass by default" if EXPERT + default y + help + If your firmware is broken and fails to describe StreamIDs which + Linux should know about in order to manage the SMMU correctly and + securely, and you don't want to boot with the 'arm-smmu.disable_bypass=0' + command line parameter, then as a last resort you can turn it off + by default here. But don't. This option may be removed at any time. + + Note that 'arm-smmu.disable_bypass=1' will still take precedence. + +config ARM_SMMU_MMU_500_CPRE_ERRATA + bool "Enable errata workaround for CPRE in SMMU reset path" + default y + help + Say Y here (by default) to apply workaround to disable + MMU-500's next-page prefetcher for sake of 4 known errata. + + Say N here only when it is sure that any errata related to + prefetch enablement are not applicable on the platform. + Refer silicon-errata.rst for info on errata IDs. + +config ARM_SMMU_QCOM + def_tristate y + depends on ARCH_QCOM + select QCOM_SCM + help + When running on a Qualcomm platform that has the custom variant + of the ARM SMMU, this needs to be built into the SMMU driver. + +config ARM_SMMU_QCOM_DEBUG + bool "ARM SMMU QCOM implementation defined debug support" + depends on ARM_SMMU_QCOM=y + help + Support for implementation specific debug features in ARM SMMU + hardware found in QTI platforms. This include support for + the Translation Buffer Units (TBU) that can be used to obtain + additional information when debugging memory management issues + like context faults. + + Say Y here to enable debug for issues such as context faults + or TLB sync timeouts which requires implementation defined + register dumps. +endif + +config ARM_SMMU_V3 + tristate "ARM Ltd. System MMU Version 3 (SMMUv3) Support" + depends on ARM64 + select IOMMU_API + select IOMMU_IO_PGTABLE_LPAE + select GENERIC_MSI_IRQ + select IOMMUFD_DRIVER if IOMMUFD + help + Support for implementations of the ARM System MMU architecture + version 3 providing translation support to a PCIe root complex. + + Say Y here if your system includes an IOMMU device implementing + the ARM SMMUv3 architecture. + +if ARM_SMMU_V3 +config ARM_SMMU_V3_SVA + bool "Shared Virtual Addressing support for the ARM SMMUv3" + select IOMMU_SVA + select IOMMU_IOPF + select MMU_NOTIFIER + help + Support for sharing process address spaces with devices using the + SMMUv3. + + Say Y here if your system supports SVA extensions such as PCIe PASID + and PRI. + +config ARM_SMMU_V3_IOMMUFD + bool "Enable IOMMUFD features for ARM SMMUv3 (EXPERIMENTAL)" + depends on IOMMUFD + help + Support for IOMMUFD features intended to support virtual machines + with accelerated virtual IOMMUs. + + Say Y here if you are doing development and testing on this feature. + +config ARM_SMMU_V3_KUNIT_TEST + tristate "KUnit tests for arm-smmu-v3 driver" if !KUNIT_ALL_TESTS + depends on KUNIT + depends on ARM_SMMU_V3_SVA + default KUNIT_ALL_TESTS + help + Enable this option to unit-test arm-smmu-v3 driver functions. + + If unsure, say N. + +config TEGRA241_CMDQV + bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3" + depends on ACPI + help + Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The + CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues + support, except with virtualization capabilities. + + Say Y here if your system is NVIDIA Tegra241 (Grace) or it has the same + CMDQ-V extension. +endif + +config QCOM_IOMMU + # Note: iommu drivers cannot (yet?) be built as modules + bool "Qualcomm IOMMU Support" + depends on ARCH_QCOM || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE + select QCOM_SCM + select IOMMU_API + select IOMMU_IO_PGTABLE_LPAE + select ARM_DMA_USE_IOMMU + help + Support for IOMMU on certain Qualcomm SoCs. diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index 5aa2e7af58b4..e4fd8d522af8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -43,6 +43,8 @@ static void arm_smmu_make_nested_cd_table_ste( target->data[0] |= nested_domain->ste[0] & ~cpu_to_le64(STRTAB_STE_0_CFG); target->data[1] |= nested_domain->ste[1]; + /* Merge events for DoS mitigations on eventq */ + target->data[1] |= cpu_to_le64(STRTAB_STE_1_MEV); } /* @@ -85,6 +87,47 @@ static void arm_smmu_make_nested_domain_ste( } } +int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state, + struct arm_smmu_nested_domain *nested_domain) +{ + struct arm_smmu_vmaster *vmaster; + unsigned long vsid; + int ret; + + iommu_group_mutex_assert(state->master->dev); + + ret = iommufd_viommu_get_vdev_id(&nested_domain->vsmmu->core, + state->master->dev, &vsid); + if (ret) + return ret; + + vmaster = kzalloc(sizeof(*vmaster), GFP_KERNEL); + if (!vmaster) + return -ENOMEM; + vmaster->vsmmu = nested_domain->vsmmu; + vmaster->vsid = vsid; + state->vmaster = vmaster; + + return 0; +} + +void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state) +{ + struct arm_smmu_master *master = state->master; + + mutex_lock(&master->smmu->streams_mutex); + kfree(master->vmaster); + master->vmaster = state->vmaster; + mutex_unlock(&master->smmu->streams_mutex); +} + +void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master) +{ + struct arm_smmu_attach_state state = { .master = master }; + + arm_smmu_attach_commit_vmaster(&state); +} + static int arm_smmu_attach_dev_nested(struct iommu_domain *domain, struct device *dev) { @@ -392,4 +435,21 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, return &vsmmu->core; } +int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt) +{ + struct iommu_vevent_arm_smmuv3 vevt; + int i; + + lockdep_assert_held(&vmaster->vsmmu->smmu->streams_mutex); + + vevt.evt[0] = cpu_to_le64((evt[0] & ~EVTQ_0_SID) | + FIELD_PREP(EVTQ_0_SID, vmaster->vsid)); + for (i = 1; i < EVTQ_ENT_DWORDS; i++) + vevt.evt[i] = cpu_to_le64(evt[i]); + + return iommufd_viommu_report_event(&vmaster->vsmmu->core, + IOMMU_VEVENTQ_TYPE_ARM_SMMUV3, &vevt, + sizeof(vevt)); +} + MODULE_IMPORT_NS("IOMMUFD"); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 9ba596430e7c..0601dece0a0d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -13,8 +13,6 @@ #include "arm-smmu-v3.h" #include "../../io-pgtable-arm.h" -static DEFINE_MUTEX(sva_lock); - static void __maybe_unused arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) { @@ -257,84 +255,6 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) return true; } -bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master) -{ - /* We're not keeping track of SIDs in fault events */ - if (master->num_streams != 1) - return false; - - return master->stall_enabled; -} - -bool arm_smmu_master_sva_supported(struct arm_smmu_master *master) -{ - if (!(master->smmu->features & ARM_SMMU_FEAT_SVA)) - return false; - - /* SSID support is mandatory for the moment */ - return master->ssid_bits; -} - -bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master) -{ - bool enabled; - - mutex_lock(&sva_lock); - enabled = master->sva_enabled; - mutex_unlock(&sva_lock); - return enabled; -} - -static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master) -{ - struct device *dev = master->dev; - - /* - * Drivers for devices supporting PRI or stall should enable IOPF first. - * Others have device-specific fault handlers and don't need IOPF. - */ - if (!arm_smmu_master_iopf_supported(master)) - return 0; - - if (!master->iopf_enabled) - return -EINVAL; - - return iopf_queue_add_device(master->smmu->evtq.iopf, dev); -} - -static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master) -{ - struct device *dev = master->dev; - - if (!master->iopf_enabled) - return; - - iopf_queue_remove_device(master->smmu->evtq.iopf, dev); -} - -int arm_smmu_master_enable_sva(struct arm_smmu_master *master) -{ - int ret; - - mutex_lock(&sva_lock); - ret = arm_smmu_master_sva_enable_iopf(master); - if (!ret) - master->sva_enabled = true; - mutex_unlock(&sva_lock); - - return ret; -} - -int arm_smmu_master_disable_sva(struct arm_smmu_master *master) -{ - mutex_lock(&sva_lock); - arm_smmu_master_sva_disable_iopf(master); - master->sva_enabled = false; - mutex_unlock(&sva_lock); - - return 0; -} - void arm_smmu_sva_notifier_synchronize(void) { /* @@ -353,6 +273,9 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, struct arm_smmu_cd target; int ret; + if (!(master->smmu->features & ARM_SMMU_FEAT_SVA)) + return -EOPNOTSUPP; + /* Prevent arm_smmu_mm_release from being called while we are attaching */ if (!mmget_not_zero(domain->mm)) return -EINVAL; @@ -406,11 +329,20 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, u32 asid; int ret; + if (!(master->smmu->features & ARM_SMMU_FEAT_SVA)) + return ERR_PTR(-EOPNOTSUPP); + smmu_domain = arm_smmu_domain_alloc(); if (IS_ERR(smmu_domain)) return ERR_CAST(smmu_domain); smmu_domain->domain.type = IOMMU_DOMAIN_SVA; smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; + + /* + * Choose page_size as the leaf page size for invalidation when + * ARM_SMMU_FEAT_RANGE_INV is present + */ + smmu_domain->domain.pgsize_bitmap = PAGE_SIZE; smmu_domain->smmu = smmu; ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 358072b4e293..dacaa78f69aa 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1052,7 +1052,7 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR | STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH | STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW | - STRTAB_STE_1_EATS); + STRTAB_STE_1_EATS | STRTAB_STE_1_MEV); used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID); /* @@ -1068,7 +1068,7 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) if (cfg & BIT(1)) { used_bits[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS | - STRTAB_STE_1_SHCFG); + STRTAB_STE_1_SHCFG | STRTAB_STE_1_MEV); used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI | @@ -1813,8 +1813,8 @@ static void arm_smmu_decode_event(struct arm_smmu_device *smmu, u64 *raw, mutex_unlock(&smmu->streams_mutex); } -static int arm_smmu_handle_event(struct arm_smmu_device *smmu, - struct arm_smmu_event *event) +static int arm_smmu_handle_event(struct arm_smmu_device *smmu, u64 *evt, + struct arm_smmu_event *event) { int ret = 0; u32 perm = 0; @@ -1823,6 +1823,10 @@ static int arm_smmu_handle_event(struct arm_smmu_device *smmu, struct iommu_fault *flt = &fault_evt.fault; switch (event->id) { + case EVT_ID_BAD_STE_CONFIG: + case EVT_ID_STREAM_DISABLED_FAULT: + case EVT_ID_BAD_SUBSTREAMID_CONFIG: + case EVT_ID_BAD_CD_CONFIG: case EVT_ID_TRANSLATION_FAULT: case EVT_ID_ADDR_SIZE_FAULT: case EVT_ID_ACCESS_FAULT: @@ -1832,31 +1836,30 @@ static int arm_smmu_handle_event(struct arm_smmu_device *smmu, return -EOPNOTSUPP; } - if (!event->stall) - return -EOPNOTSUPP; - - if (event->read) - perm |= IOMMU_FAULT_PERM_READ; - else - perm |= IOMMU_FAULT_PERM_WRITE; + if (event->stall) { + if (event->read) + perm |= IOMMU_FAULT_PERM_READ; + else + perm |= IOMMU_FAULT_PERM_WRITE; - if (event->instruction) - perm |= IOMMU_FAULT_PERM_EXEC; + if (event->instruction) + perm |= IOMMU_FAULT_PERM_EXEC; - if (event->privileged) - perm |= IOMMU_FAULT_PERM_PRIV; + if (event->privileged) + perm |= IOMMU_FAULT_PERM_PRIV; - flt->type = IOMMU_FAULT_PAGE_REQ; - flt->prm = (struct iommu_fault_page_request) { - .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, - .grpid = event->stag, - .perm = perm, - .addr = event->iova, - }; + flt->type = IOMMU_FAULT_PAGE_REQ; + flt->prm = (struct iommu_fault_page_request){ + .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE, + .grpid = event->stag, + .perm = perm, + .addr = event->iova, + }; - if (event->ssv) { - flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; - flt->prm.pasid = event->ssid; + if (event->ssv) { + flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + flt->prm.pasid = event->ssid; + } } mutex_lock(&smmu->streams_mutex); @@ -1866,7 +1869,12 @@ static int arm_smmu_handle_event(struct arm_smmu_device *smmu, goto out_unlock; } - ret = iommu_report_device_fault(master->dev, &fault_evt); + if (event->stall) + ret = iommu_report_device_fault(master->dev, &fault_evt); + else if (master->vmaster && !event->s2) + ret = arm_vmaster_report_event(master->vmaster, evt); + else + ret = -EOPNOTSUPP; /* Unhandled events should be pinned */ out_unlock: mutex_unlock(&smmu->streams_mutex); return ret; @@ -1944,7 +1952,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) do { while (!queue_remove_raw(q, evt)) { arm_smmu_decode_event(smmu, evt, &event); - if (arm_smmu_handle_event(smmu, &event)) + if (arm_smmu_handle_event(smmu, evt, &event)) arm_smmu_dump_event(smmu, evt, &event, &rs); put_device(event.dev); @@ -2712,6 +2720,7 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master) static struct arm_smmu_master_domain * arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, + struct iommu_domain *domain, struct arm_smmu_master *master, ioasid_t ssid, bool nested_ats_flush) { @@ -2722,6 +2731,7 @@ arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) { if (master_domain->master == master && + master_domain->domain == domain && master_domain->ssid == ssid && master_domain->nested_ats_flush == nested_ats_flush) return master_domain; @@ -2748,6 +2758,58 @@ to_smmu_domain_devices(struct iommu_domain *domain) return NULL; } +static int arm_smmu_enable_iopf(struct arm_smmu_master *master, + struct arm_smmu_master_domain *master_domain) +{ + int ret; + + iommu_group_mutex_assert(master->dev); + + if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA)) + return -EOPNOTSUPP; + + /* + * Drivers for devices supporting PRI or stall require iopf others have + * device-specific fault handlers and don't need IOPF, so this is not a + * failure. + */ + if (!master->stall_enabled) + return 0; + + /* We're not keeping track of SIDs in fault events */ + if (master->num_streams != 1) + return -EOPNOTSUPP; + + if (master->iopf_refcount) { + master->iopf_refcount++; + master_domain->using_iopf = true; + return 0; + } + + ret = iopf_queue_add_device(master->smmu->evtq.iopf, master->dev); + if (ret) + return ret; + master->iopf_refcount = 1; + master_domain->using_iopf = true; + return 0; +} + +static void arm_smmu_disable_iopf(struct arm_smmu_master *master, + struct arm_smmu_master_domain *master_domain) +{ + iommu_group_mutex_assert(master->dev); + + if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA)) + return; + + if (!master_domain || !master_domain->using_iopf) + return; + + master->iopf_refcount--; + if (master->iopf_refcount == 0) + iopf_queue_remove_device(master->smmu->evtq.iopf, master->dev); +} + static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, struct iommu_domain *domain, ioasid_t ssid) @@ -2764,15 +2826,17 @@ static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, nested_ats_flush = to_smmu_nested_domain(domain)->enable_ats; spin_lock_irqsave(&smmu_domain->devices_lock, flags); - master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid, - nested_ats_flush); + master_domain = arm_smmu_find_master_domain(smmu_domain, domain, master, + ssid, nested_ats_flush); if (master_domain) { list_del(&master_domain->devices_elm); - kfree(master_domain); if (master->ats_enabled) atomic_dec(&smmu_domain->nr_ats_masters); } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + + arm_smmu_disable_iopf(master, master_domain); + kfree(master_domain); } /* @@ -2803,6 +2867,7 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(new_domain); unsigned long flags; + int ret; /* * arm_smmu_share_asid() must not see two domains pointing to the same @@ -2832,15 +2897,31 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, } if (smmu_domain) { + if (new_domain->type == IOMMU_DOMAIN_NESTED) { + ret = arm_smmu_attach_prepare_vmaster( + state, to_smmu_nested_domain(new_domain)); + if (ret) + return ret; + } + master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL); - if (!master_domain) - return -ENOMEM; + if (!master_domain) { + ret = -ENOMEM; + goto err_free_vmaster; + } + master_domain->domain = new_domain; master_domain->master = master; master_domain->ssid = state->ssid; if (new_domain->type == IOMMU_DOMAIN_NESTED) master_domain->nested_ats_flush = to_smmu_nested_domain(new_domain)->enable_ats; + if (new_domain->iopf_handler) { + ret = arm_smmu_enable_iopf(master, master_domain); + if (ret) + goto err_free_master_domain; + } + /* * During prepare we want the current smmu_domain and new * smmu_domain to be in the devices list before we change any @@ -2860,8 +2941,8 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, !arm_smmu_master_canwbs(master)) { spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - kfree(master_domain); - return -EINVAL; + ret = -EINVAL; + goto err_iopf; } if (state->ats_enabled) @@ -2880,6 +2961,14 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, wmb(); } return 0; + +err_iopf: + arm_smmu_disable_iopf(master, master_domain); +err_free_master_domain: + kfree(master_domain); +err_free_vmaster: + kfree(state->vmaster); + return ret; } /* @@ -2893,6 +2982,8 @@ void arm_smmu_attach_commit(struct arm_smmu_attach_state *state) lockdep_assert_held(&arm_smmu_asid_lock); + arm_smmu_attach_commit_vmaster(state); + if (state->ats_enabled && !master->ats_enabled) { arm_smmu_enable_ats(master); } else if (state->ats_enabled && master->ats_enabled) { @@ -2932,7 +3023,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) smmu = master->smmu; if (smmu_domain->smmu != smmu) - return ret; + return -EINVAL; if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID); @@ -3162,6 +3253,7 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, struct arm_smmu_ste ste; struct arm_smmu_master *master = dev_iommu_priv_get(dev); + arm_smmu_master_clear_vmaster(master); arm_smmu_make_bypass_ste(master->smmu, &ste); arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS); return 0; @@ -3180,7 +3272,9 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain, struct device *dev) { struct arm_smmu_ste ste; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + arm_smmu_master_clear_vmaster(master); arm_smmu_make_abort_ste(&ste); arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_TERMINATE); @@ -3364,6 +3458,7 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, mutex_lock(&smmu->streams_mutex); for (i = 0; i < fwspec->num_ids; i++) { struct arm_smmu_stream *new_stream = &master->streams[i]; + struct rb_node *existing; u32 sid = fwspec->ids[i]; new_stream->id = sid; @@ -3374,11 +3469,21 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, break; /* Insert into SID tree */ - if (rb_find_add(&new_stream->node, &smmu->streams, - arm_smmu_streams_cmp_node)) { - dev_warn(master->dev, "stream %u already in tree\n", - sid); - ret = -EINVAL; + existing = rb_find_add(&new_stream->node, &smmu->streams, + arm_smmu_streams_cmp_node); + if (existing) { + struct arm_smmu_master *existing_master = + rb_entry(existing, struct arm_smmu_stream, node) + ->master; + + /* Bridged PCI devices may end up with duplicated IDs */ + if (existing_master == master) + continue; + + dev_warn(master->dev, + "Aliasing StreamID 0x%x (from %s) unsupported, expect DMA to be broken\n", + sid, dev_name(existing_master->dev)); + ret = -ENODEV; break; } } @@ -3475,8 +3580,7 @@ static void arm_smmu_release_device(struct device *dev) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); - if (WARN_ON(arm_smmu_master_sva_enabled(master))) - iopf_queue_remove_device(master->smmu->evtq.iopf, dev); + WARN_ON(master->iopf_refcount); /* Put the STE back to what arm_smmu_init_strtab() sets */ if (dev->iommu->require_direct) @@ -3551,58 +3655,6 @@ static void arm_smmu_get_resv_regions(struct device *dev, iommu_dma_get_resv_regions(dev, head); } -static int arm_smmu_dev_enable_feature(struct device *dev, - enum iommu_dev_features feat) -{ - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - - if (!master) - return -ENODEV; - - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - if (!arm_smmu_master_iopf_supported(master)) - return -EINVAL; - if (master->iopf_enabled) - return -EBUSY; - master->iopf_enabled = true; - return 0; - case IOMMU_DEV_FEAT_SVA: - if (!arm_smmu_master_sva_supported(master)) - return -EINVAL; - if (arm_smmu_master_sva_enabled(master)) - return -EBUSY; - return arm_smmu_master_enable_sva(master); - default: - return -EINVAL; - } -} - -static int arm_smmu_dev_disable_feature(struct device *dev, - enum iommu_dev_features feat) -{ - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - - if (!master) - return -EINVAL; - - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - if (!master->iopf_enabled) - return -EINVAL; - if (master->sva_enabled) - return -EBUSY; - master->iopf_enabled = false; - return 0; - case IOMMU_DEV_FEAT_SVA: - if (!arm_smmu_master_sva_enabled(master)) - return -EINVAL; - return arm_smmu_master_disable_sva(master); - default: - return -EINVAL; - } -} - /* * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the * PCIe link and save the data to memory by DMA. The hardware is restricted to @@ -3635,8 +3687,6 @@ static struct iommu_ops arm_smmu_ops = { .device_group = arm_smmu_device_group, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, - .dev_enable_feat = arm_smmu_dev_enable_feature, - .dev_disable_feat = arm_smmu_dev_disable_feature, .page_response = arm_smmu_page_response, .def_domain_type = arm_smmu_def_domain_type, .viommu_alloc = arm_vsmmu_alloc, @@ -4405,6 +4455,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3); if (FIELD_GET(IDR3_RIL, reg)) smmu->features |= ARM_SMMU_FEAT_RANGE_INV; + if (FIELD_GET(IDR3_FWB, reg)) + smmu->features |= ARM_SMMU_FEAT_S2FWB; /* IDR5 */ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index bd9d7c85576a..ea41d790463e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -266,6 +266,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4) #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6) +#define STRTAB_STE_1_MEV (1UL << 19) #define STRTAB_STE_1_S2FWB (1UL << 25) #define STRTAB_STE_1_S1STALLD (1UL << 27) @@ -799,6 +800,11 @@ struct arm_smmu_stream { struct rb_node node; }; +struct arm_smmu_vmaster { + struct arm_vsmmu *vsmmu; + unsigned long vsid; +}; + struct arm_smmu_event { u8 stall : 1, ssv : 1, @@ -824,15 +830,15 @@ struct arm_smmu_master { struct arm_smmu_device *smmu; struct device *dev; struct arm_smmu_stream *streams; + struct arm_smmu_vmaster *vmaster; /* use smmu->streams_mutex */ /* Locked by the iommu core using the group mutex */ struct arm_smmu_ctx_desc_cfg cd_table; unsigned int num_streams; bool ats_enabled : 1; bool ste_ats_enabled : 1; bool stall_enabled; - bool sva_enabled; - bool iopf_enabled; unsigned int ssid_bits; + unsigned int iopf_refcount; }; /* SMMU private data for an IOMMU domain */ @@ -908,8 +914,14 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, struct arm_smmu_master_domain { struct list_head devices_elm; struct arm_smmu_master *master; + /* + * For nested domains the master_domain is threaded onto the S2 parent, + * this points to the IOMMU_DOMAIN_NESTED to disambiguate the masters. + */ + struct iommu_domain *domain; ioasid_t ssid; bool nested_ats_flush : 1; + bool using_iopf : 1; }; static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) @@ -972,6 +984,7 @@ struct arm_smmu_attach_state { bool disable_ats; ioasid_t ssid; /* Resulting state */ + struct arm_smmu_vmaster *vmaster; bool ats_enabled; }; @@ -987,11 +1000,6 @@ int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, #ifdef CONFIG_ARM_SMMU_V3_SVA bool arm_smmu_sva_supported(struct arm_smmu_device *smmu); -bool arm_smmu_master_sva_supported(struct arm_smmu_master *master); -bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master); -int arm_smmu_master_enable_sva(struct arm_smmu_master *master); -int arm_smmu_master_disable_sva(struct arm_smmu_master *master); -bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master); void arm_smmu_sva_notifier_synchronize(void); struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, struct mm_struct *mm); @@ -1001,31 +1009,6 @@ static inline bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) return false; } -static inline bool arm_smmu_master_sva_supported(struct arm_smmu_master *master) -{ - return false; -} - -static inline bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master) -{ - return false; -} - -static inline int arm_smmu_master_enable_sva(struct arm_smmu_master *master) -{ - return -ENODEV; -} - -static inline int arm_smmu_master_disable_sva(struct arm_smmu_master *master) -{ - return -ENODEV; -} - -static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master) -{ - return false; -} - static inline void arm_smmu_sva_notifier_synchronize(void) {} #define arm_smmu_sva_domain_alloc NULL @@ -1055,9 +1038,37 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, struct iommu_domain *parent, struct iommufd_ctx *ictx, unsigned int viommu_type); +int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state, + struct arm_smmu_nested_domain *nested_domain); +void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state); +void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master); +int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt); #else #define arm_smmu_hw_info NULL #define arm_vsmmu_alloc NULL + +static inline int +arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state, + struct arm_smmu_nested_domain *nested_domain) +{ + return 0; +} + +static inline void +arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state) +{ +} + +static inline void +arm_smmu_master_clear_vmaster(struct arm_smmu_master *master) +{ +} + +static inline int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, + u64 *evt) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_ARM_SMMU_V3_IOMMUFD */ #endif /* _ARM_SMMU_V3_H */ diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index d03b2239baad..65e0ef6539fe 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -406,6 +406,12 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) arm_smmu_print_context_fault_info(smmu, idx, &cfi); arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); + + if (cfi.fsr & ARM_SMMU_CB_FSR_SS) { + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, + ret == -EAGAIN ? 0 : ARM_SMMU_RESUME_TERMINATE); + } + return IRQ_HANDLED; } @@ -416,6 +422,9 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) if (!tmp || tmp == -EBUSY) { ret = IRQ_HANDLED; resume = ARM_SMMU_RESUME_TERMINATE; + } else if (tmp == -EAGAIN) { + ret = IRQ_HANDLED; + resume = 0; } else { phys_addr_t phys_atos = qcom_smmu_verify_fault(smmu_domain, cfi.iova, cfi.fsr); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 59d02687280e..57c097e87613 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -112,25 +112,39 @@ static void qcom_adreno_smmu_set_stall(const void *cookie, bool enabled) { struct arm_smmu_domain *smmu_domain = (void *)cookie; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - struct qcom_smmu *qsmmu = to_qcom_smmu(smmu_domain->smmu); + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); + u32 mask = BIT(cfg->cbndx); + bool stall_changed = !!(qsmmu->stall_enabled & mask) != enabled; + unsigned long flags; if (enabled) - qsmmu->stall_enabled |= BIT(cfg->cbndx); + qsmmu->stall_enabled |= mask; else - qsmmu->stall_enabled &= ~BIT(cfg->cbndx); -} + qsmmu->stall_enabled &= ~mask; -static void qcom_adreno_smmu_resume_translation(const void *cookie, bool terminate) -{ - struct arm_smmu_domain *smmu_domain = (void *)cookie; - struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - struct arm_smmu_device *smmu = smmu_domain->smmu; - u32 reg = 0; + /* + * If the device is on and we changed the setting, update the register. + * The spec pseudocode says that CFCFG is resampled after a fault, and + * we believe that no implementations cache it in the TLB, so it should + * be safe to change it without a TLB invalidation. + */ + if (stall_changed && pm_runtime_get_if_active(smmu->dev) > 0) { + u32 reg; + + spin_lock_irqsave(&smmu_domain->cb_lock, flags); + reg = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR); + + if (enabled) + reg |= ARM_SMMU_SCTLR_CFCFG; + else + reg &= ~ARM_SMMU_SCTLR_CFCFG; - if (terminate) - reg |= ARM_SMMU_RESUME_TERMINATE; + arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR, reg); + spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); - arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg); + pm_runtime_put_autosuspend(smmu->dev); + } } static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set) @@ -337,12 +351,12 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, priv->set_ttbr0_cfg = qcom_adreno_smmu_set_ttbr0_cfg; priv->get_fault_info = qcom_adreno_smmu_get_fault_info; priv->set_stall = qcom_adreno_smmu_set_stall; - priv->resume_translation = qcom_adreno_smmu_resume_translation; priv->set_prr_bit = NULL; priv->set_prr_addr = NULL; if (of_device_is_compatible(np, "qcom,smmu-500") && - of_device_is_compatible(np, "qcom,adreno-smmu")) { + !of_device_is_compatible(np, "qcom,sm8250-smmu-500") && + of_device_is_compatible(np, "qcom,adreno-smmu")) { priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit; priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr; } @@ -356,6 +370,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,mdp4" }, { .compatible = "qcom,mdss" }, { .compatible = "qcom,qcm2290-mdss" }, + { .compatible = "qcom,sar2130p-mdss" }, { .compatible = "qcom,sc7180-mdss" }, { .compatible = "qcom,sc7180-mss-pil" }, { .compatible = "qcom,sc7280-mdss" }, @@ -365,6 +380,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,sdm670-mdss" }, { .compatible = "qcom,sdm845-mdss" }, { .compatible = "qcom,sdm845-mss-pil" }, + { .compatible = "qcom,sm6115-mdss" }, { .compatible = "qcom,sm6350-mdss" }, { .compatible = "qcom,sm6375-mdss" }, { .compatible = "qcom,sm8150-mdss" }, @@ -585,6 +601,7 @@ static const struct arm_smmu_impl qcom_adreno_smmu_v2_impl = { .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank, .write_sctlr = qcom_adreno_smmu_write_sctlr, .tlb_sync = qcom_smmu_tlb_sync, + .context_fault_needs_threaded_irq = true, }; static const struct arm_smmu_impl qcom_adreno_smmu_500_impl = { @@ -594,6 +611,7 @@ static const struct arm_smmu_impl qcom_adreno_smmu_500_impl = { .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank, .write_sctlr = qcom_adreno_smmu_write_sctlr, .tlb_sync = qcom_smmu_tlb_sync, + .context_fault_needs_threaded_irq = true, }; static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index de205a34ffc6..8d95b14c7d5a 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -79,8 +79,11 @@ static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu) static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu) { - if (pm_runtime_enabled(smmu->dev)) - pm_runtime_put_autosuspend(smmu->dev); + if (pm_runtime_enabled(smmu->dev)) { + pm_runtime_mark_last_busy(smmu->dev); + __pm_runtime_put_autosuspend(smmu->dev); + + } } static void arm_smmu_rpm_use_autosuspend(struct arm_smmu_device *smmu) @@ -471,6 +474,12 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) arm_smmu_print_context_fault_info(smmu, idx, &cfi); arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); + + if (cfi.fsr & ARM_SMMU_CB_FSR_SS) { + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, + ret == -EAGAIN ? 0 : ARM_SMMU_RESUME_TERMINATE); + } + return IRQ_HANDLED; } @@ -1195,7 +1204,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) /* Looks ok, so add the device to the domain */ arm_smmu_master_install_s2crs(cfg, S2CR_TYPE_TRANS, smmu_domain->cfg.cbndx, fwspec); - arm_smmu_rpm_use_autosuspend(smmu); rpm_put: arm_smmu_rpm_put(smmu); return ret; @@ -1218,7 +1226,6 @@ static int arm_smmu_attach_dev_type(struct device *dev, return ret; arm_smmu_master_install_s2crs(cfg, type, 0, fwspec); - arm_smmu_rpm_use_autosuspend(smmu); arm_smmu_rpm_put(smmu); return 0; } @@ -1486,7 +1493,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) out_cfg_free: kfree(cfg); out_free: - iommu_fwspec_free(dev); return ERR_PTR(ret); } @@ -2246,6 +2252,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (dev->pm_domain) { pm_runtime_set_active(dev); pm_runtime_enable(dev); + arm_smmu_rpm_use_autosuspend(smmu); } return 0; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 2a9fa0c8cc00..ea2ef53bd4fe 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -24,8 +24,10 @@ #include <linux/memremap.h> #include <linux/mm.h> #include <linux/mutex.h> +#include <linux/msi.h> #include <linux/of_iommu.h> #include <linux/pci.h> +#include <linux/pci-p2pdma.h> #include <linux/scatterlist.h> #include <linux/spinlock.h> #include <linux/swiotlb.h> @@ -41,11 +43,6 @@ struct iommu_dma_msi_page { phys_addr_t phys; }; -enum iommu_dma_cookie_type { - IOMMU_DMA_IOVA_COOKIE, - IOMMU_DMA_MSI_COOKIE, -}; - enum iommu_dma_queue_type { IOMMU_DMA_OPTS_PER_CPU_QUEUE, IOMMU_DMA_OPTS_SINGLE_QUEUE, @@ -58,35 +55,30 @@ struct iommu_dma_options { }; struct iommu_dma_cookie { - enum iommu_dma_cookie_type type; + struct iova_domain iovad; + struct list_head msi_page_list; + /* Flush queue */ union { - /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ - struct { - struct iova_domain iovad; - /* Flush queue */ - union { - struct iova_fq *single_fq; - struct iova_fq __percpu *percpu_fq; - }; - /* Number of TLB flushes that have been started */ - atomic64_t fq_flush_start_cnt; - /* Number of TLB flushes that have been finished */ - atomic64_t fq_flush_finish_cnt; - /* Timer to regularily empty the flush queues */ - struct timer_list fq_timer; - /* 1 when timer is active, 0 when not */ - atomic_t fq_timer_on; - }; - /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */ - dma_addr_t msi_iova; + struct iova_fq *single_fq; + struct iova_fq __percpu *percpu_fq; }; - struct list_head msi_page_list; - + /* Number of TLB flushes that have been started */ + atomic64_t fq_flush_start_cnt; + /* Number of TLB flushes that have been finished */ + atomic64_t fq_flush_finish_cnt; + /* Timer to regularily empty the flush queues */ + struct timer_list fq_timer; + /* 1 when timer is active, 0 when not */ + atomic_t fq_timer_on; /* Domain for flush queue callback; NULL if flush queue not in use */ - struct iommu_domain *fq_domain; + struct iommu_domain *fq_domain; /* Options for dma-iommu use */ - struct iommu_dma_options options; - struct mutex mutex; + struct iommu_dma_options options; +}; + +struct iommu_dma_msi_cookie { + dma_addr_t msi_iova; + struct list_head msi_page_list; }; static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); @@ -114,7 +106,7 @@ early_param("iommu.forcedac", iommu_dma_forcedac_setup); struct iova_fq_entry { unsigned long iova_pfn; unsigned long pages; - struct list_head freelist; + struct iommu_pages_list freelist; u64 counter; /* Flush counter when this entry was added */ }; @@ -163,6 +155,8 @@ static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq fq->entries[idx].iova_pfn, fq->entries[idx].pages); + fq->entries[idx].freelist = + IOMMU_PAGES_LIST_INIT(fq->entries[idx].freelist); fq->head = (fq->head + 1) & fq->mod_mask; } } @@ -185,7 +179,8 @@ static void fq_flush_iotlb(struct iommu_dma_cookie *cookie) static void fq_flush_timeout(struct timer_list *t) { - struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer); + struct iommu_dma_cookie *cookie = timer_container_of(cookie, t, + fq_timer); int cpu; atomic_set(&cookie->fq_timer_on, 0); @@ -201,7 +196,7 @@ static void fq_flush_timeout(struct timer_list *t) static void queue_iova(struct iommu_dma_cookie *cookie, unsigned long pfn, unsigned long pages, - struct list_head *freelist) + struct iommu_pages_list *freelist) { struct iova_fq *fq; unsigned long flags; @@ -240,7 +235,7 @@ static void queue_iova(struct iommu_dma_cookie *cookie, fq->entries[idx].iova_pfn = pfn; fq->entries[idx].pages = pages; fq->entries[idx].counter = atomic64_read(&cookie->fq_flush_start_cnt); - list_splice(freelist, &fq->entries[idx].freelist); + iommu_pages_list_splice(freelist, &fq->entries[idx].freelist); spin_unlock_irqrestore(&fq->lock, flags); @@ -280,7 +275,7 @@ static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) if (!cookie->fq_domain) return; - del_timer_sync(&cookie->fq_timer); + timer_delete_sync(&cookie->fq_timer); if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) iommu_dma_free_fq_single(cookie->single_fq); else @@ -298,7 +293,8 @@ static void iommu_dma_init_one_fq(struct iova_fq *fq, size_t fq_size) spin_lock_init(&fq->lock); for (i = 0; i < fq_size; i++) - INIT_LIST_HEAD(&fq->entries[i].freelist); + fq->entries[i].freelist = + IOMMU_PAGES_LIST_INIT(fq->entries[i].freelist); } static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie) @@ -365,39 +361,24 @@ int iommu_dma_init_fq(struct iommu_domain *domain) return 0; } -static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) -{ - if (cookie->type == IOMMU_DMA_IOVA_COOKIE) - return cookie->iovad.granule; - return PAGE_SIZE; -} - -static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type) -{ - struct iommu_dma_cookie *cookie; - - cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); - if (cookie) { - INIT_LIST_HEAD(&cookie->msi_page_list); - cookie->type = type; - } - return cookie; -} - /** * iommu_get_dma_cookie - Acquire DMA-API resources for a domain * @domain: IOMMU domain to prepare for DMA-API usage */ int iommu_get_dma_cookie(struct iommu_domain *domain) { - if (domain->iova_cookie) + struct iommu_dma_cookie *cookie; + + if (domain->cookie_type != IOMMU_COOKIE_NONE) return -EEXIST; - domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE); - if (!domain->iova_cookie) + cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); + if (!cookie) return -ENOMEM; - mutex_init(&domain->iova_cookie->mutex); + INIT_LIST_HEAD(&cookie->msi_page_list); + domain->cookie_type = IOMMU_COOKIE_DMA_IOVA; + domain->iova_cookie = cookie; return 0; } @@ -415,48 +396,56 @@ int iommu_get_dma_cookie(struct iommu_domain *domain) */ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) { - struct iommu_dma_cookie *cookie; + struct iommu_dma_msi_cookie *cookie; if (domain->type != IOMMU_DOMAIN_UNMANAGED) return -EINVAL; - if (domain->iova_cookie) + if (domain->cookie_type != IOMMU_COOKIE_NONE) return -EEXIST; - cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE); + cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); if (!cookie) return -ENOMEM; cookie->msi_iova = base; - domain->iova_cookie = cookie; + INIT_LIST_HEAD(&cookie->msi_page_list); + domain->cookie_type = IOMMU_COOKIE_DMA_MSI; + domain->msi_cookie = cookie; return 0; } EXPORT_SYMBOL(iommu_get_msi_cookie); /** * iommu_put_dma_cookie - Release a domain's DMA mapping resources - * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or - * iommu_get_msi_cookie() + * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() */ void iommu_put_dma_cookie(struct iommu_domain *domain) { struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iommu_dma_msi_page *msi, *tmp; - if (!cookie) - return; - - if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) { + if (cookie->iovad.granule) { iommu_dma_free_fq(cookie); put_iova_domain(&cookie->iovad); } + list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) + kfree(msi); + kfree(cookie); +} + +/** + * iommu_put_msi_cookie - Release a domain's MSI mapping resources + * @domain: IOMMU domain previously prepared by iommu_get_msi_cookie() + */ +void iommu_put_msi_cookie(struct iommu_domain *domain) +{ + struct iommu_dma_msi_cookie *cookie = domain->msi_cookie; + struct iommu_dma_msi_page *msi, *tmp; - list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) { - list_del(&msi->list); + list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) kfree(msi); - } kfree(cookie); - domain->iova_cookie = NULL; } /** @@ -676,7 +665,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev struct iova_domain *iovad; int ret; - if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) + if (!cookie || domain->cookie_type != IOMMU_COOKIE_DMA_IOVA) return -EINVAL; iovad = &cookie->iovad; @@ -698,23 +687,20 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev domain->geometry.aperture_start >> order); /* start_pfn is always nonzero for an already-initialised domain */ - mutex_lock(&cookie->mutex); if (iovad->start_pfn) { if (1UL << order != iovad->granule || base_pfn != iovad->start_pfn) { pr_warn("Incompatible range for DMA domain\n"); - ret = -EFAULT; - goto done_unlock; + return -EFAULT; } - ret = 0; - goto done_unlock; + return 0; } init_iova_domain(iovad, 1UL << order, base_pfn); ret = iova_domain_init_rcaches(iovad); if (ret) - goto done_unlock; + return ret; iommu_dma_init_options(&cookie->options, dev); @@ -723,11 +709,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain))) domain->type = IOMMU_DOMAIN_DMA; - ret = iova_reserve_iommu_regions(dev, domain); - -done_unlock: - mutex_unlock(&cookie->mutex); - return ret; + return iova_reserve_iommu_regions(dev, domain); } /** @@ -766,9 +748,9 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, struct iova_domain *iovad = &cookie->iovad; unsigned long shift, iova_len, iova; - if (cookie->type == IOMMU_DMA_MSI_COOKIE) { - cookie->msi_iova += size; - return cookie->msi_iova - size; + if (domain->cookie_type == IOMMU_COOKIE_DMA_MSI) { + domain->msi_cookie->msi_iova += size; + return domain->msi_cookie->msi_iova - size; } shift = iova_shift(iovad); @@ -805,16 +787,16 @@ done: return (dma_addr_t)iova << shift; } -static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, - dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather) +static void iommu_dma_free_iova(struct iommu_domain *domain, dma_addr_t iova, + size_t size, struct iommu_iotlb_gather *gather) { - struct iova_domain *iovad = &cookie->iovad; + struct iova_domain *iovad = &domain->iova_cookie->iovad; /* The MSI case is only ever cleaning up its most recent allocation */ - if (cookie->type == IOMMU_DMA_MSI_COOKIE) - cookie->msi_iova -= size; + if (domain->cookie_type == IOMMU_COOKIE_DMA_MSI) + domain->msi_cookie->msi_iova -= size; else if (gather && gather->queued) - queue_iova(cookie, iova_pfn(iovad, iova), + queue_iova(domain->iova_cookie, iova_pfn(iovad, iova), size >> iova_shift(iovad), &gather->freelist); else @@ -842,7 +824,7 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, if (!iotlb_gather.queued) iommu_iotlb_sync(domain, &iotlb_gather); - iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather); + iommu_dma_free_iova(domain, dma_addr, size, &iotlb_gather); } static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, @@ -870,7 +852,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, return DMA_MAPPING_ERROR; if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) { - iommu_dma_free_iova(cookie, iova, size, NULL); + iommu_dma_free_iova(domain, iova, size, NULL); return DMA_MAPPING_ERROR; } return iova + iova_off; @@ -1007,7 +989,7 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, out_free_sg: sg_free_table(sgt); out_free_iova: - iommu_dma_free_iova(cookie, iova, size, NULL); + iommu_dma_free_iova(domain, iova, size, NULL); out_free_pages: __iommu_dma_free_pages(pages, count); return NULL; @@ -1160,6 +1142,54 @@ void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); } +static phys_addr_t iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iova_domain *iovad = &domain->iova_cookie->iovad; + + if (!is_swiotlb_active(dev)) { + dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n"); + return (phys_addr_t)DMA_MAPPING_ERROR; + } + + trace_swiotlb_bounced(dev, phys, size); + + phys = swiotlb_tbl_map_single(dev, phys, size, iova_mask(iovad), dir, + attrs); + + /* + * Untrusted devices should not see padding areas with random leftover + * kernel data, so zero the pre- and post-padding. + * swiotlb_tbl_map_single() has initialized the bounce buffer proper to + * the contents of the original memory buffer. + */ + if (phys != (phys_addr_t)DMA_MAPPING_ERROR && dev_is_untrusted(dev)) { + size_t start, virt = (size_t)phys_to_virt(phys); + + /* Pre-padding */ + start = iova_align_down(iovad, virt); + memset((void *)start, 0, virt - start); + + /* Post-padding */ + start = virt + size; + memset((void *)start, 0, iova_align(iovad, start) - start); + } + + return phys; +} + +/* + * Checks if a physical buffer has unaligned boundaries with respect to + * the IOMMU granule. Returns non-zero if either the start or end + * address is not aligned to the granule boundary. + */ +static inline size_t iova_unaligned(struct iova_domain *iovad, phys_addr_t phys, + size_t size) +{ + return iova_offset(iovad, phys | size); +} + dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) @@ -1173,42 +1203,14 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, dma_addr_t iova, dma_mask = dma_get_mask(dev); /* - * If both the physical buffer start address and size are - * page aligned, we don't need to use a bounce page. + * If both the physical buffer start address and size are page aligned, + * we don't need to use a bounce page. */ if (dev_use_swiotlb(dev, size, dir) && - iova_offset(iovad, phys | size)) { - if (!is_swiotlb_active(dev)) { - dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n"); - return DMA_MAPPING_ERROR; - } - - trace_swiotlb_bounced(dev, phys, size); - - phys = swiotlb_tbl_map_single(dev, phys, size, - iova_mask(iovad), dir, attrs); - - if (phys == DMA_MAPPING_ERROR) + iova_unaligned(iovad, phys, size)) { + phys = iommu_dma_map_swiotlb(dev, phys, size, dir, attrs); + if (phys == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; - - /* - * Untrusted devices should not see padding areas with random - * leftover kernel data, so zero the pre- and post-padding. - * swiotlb_tbl_map_single() has initialized the bounce buffer - * proper to the contents of the original memory buffer. - */ - if (dev_is_untrusted(dev)) { - size_t start, virt = (size_t)phys_to_virt(phys); - - /* Pre-padding */ - start = iova_align_down(iovad, virt); - memset((void *)start, 0, virt - start); - - /* Post-padding */ - start = virt + size; - memset((void *)start, 0, - iova_align(iovad, start) - start); - } } if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) @@ -1382,7 +1384,6 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, struct scatterlist *s, *prev = NULL; int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs); struct pci_p2pdma_map_state p2pdma_state = {}; - enum pci_p2pdma_map_type map; dma_addr_t iova; size_t iova_len = 0; unsigned long mask = dma_get_seg_boundary(dev); @@ -1412,28 +1413,30 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, size_t s_length = s->length; size_t pad_len = (mask - iova_len + 1) & mask; - if (is_pci_p2pdma_page(sg_page(s))) { - map = pci_p2pdma_map_segment(&p2pdma_state, dev, s); - switch (map) { - case PCI_P2PDMA_MAP_BUS_ADDR: - /* - * iommu_map_sg() will skip this segment as - * it is marked as a bus address, - * __finalise_sg() will copy the dma address - * into the output segment. - */ - continue; - case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: - /* - * Mapping through host bridge should be - * mapped with regular IOVAs, thus we - * do nothing here and continue below. - */ - break; - default: - ret = -EREMOTEIO; - goto out_restore_sg; - } + switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(s))) { + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: + /* + * Mapping through host bridge should be mapped with + * regular IOVAs, thus we do nothing here and continue + * below. + */ + break; + case PCI_P2PDMA_MAP_NONE: + break; + case PCI_P2PDMA_MAP_BUS_ADDR: + /* + * iommu_map_sg() will skip this segment as it is marked + * as a bus address, __finalise_sg() will copy the dma + * address into the output segment. + */ + s->dma_address = pci_p2pdma_bus_addr_map(&p2pdma_state, + sg_phys(s)); + sg_dma_len(s) = sg->length; + sg_dma_mark_bus_address(s); + continue; + default: + ret = -EREMOTEIO; + goto out_restore_sg; } sg_dma_address(s) = s_iova_off; @@ -1484,7 +1487,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, return __finalise_sg(dev, sg, nents, iova); out_free_iova: - iommu_dma_free_iova(cookie, iova, iova_len, NULL); + iommu_dma_free_iova(domain, iova, iova_len, NULL); out_restore_sg: __invalidate_sg(sg, nents); out: @@ -1744,6 +1747,354 @@ size_t iommu_dma_max_mapping_size(struct device *dev) return SIZE_MAX; } +/** + * dma_iova_try_alloc - Try to allocate an IOVA space + * @dev: Device to allocate the IOVA space for + * @state: IOVA state + * @phys: physical address + * @size: IOVA size + * + * Check if @dev supports the IOVA-based DMA API, and if yes allocate IOVA space + * for the given base address and size. + * + * Note: @phys is only used to calculate the IOVA alignment. Callers that always + * do PAGE_SIZE aligned transfers can safely pass 0 here. + * + * Returns %true if the IOVA-based DMA API can be used and IOVA space has been + * allocated, or %false if the regular DMA API should be used. + */ +bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state, + phys_addr_t phys, size_t size) +{ + struct iommu_dma_cookie *cookie; + struct iommu_domain *domain; + struct iova_domain *iovad; + size_t iova_off; + dma_addr_t addr; + + memset(state, 0, sizeof(*state)); + if (!use_dma_iommu(dev)) + return false; + + domain = iommu_get_dma_domain(dev); + cookie = domain->iova_cookie; + iovad = &cookie->iovad; + iova_off = iova_offset(iovad, phys); + + if (static_branch_unlikely(&iommu_deferred_attach_enabled) && + iommu_deferred_attach(dev, iommu_get_domain_for_dev(dev))) + return false; + + if (WARN_ON_ONCE(!size)) + return false; + + /* + * DMA_IOVA_USE_SWIOTLB is flag which is set by dma-iommu + * internals, make sure that caller didn't set it and/or + * didn't use this interface to map SIZE_MAX. + */ + if (WARN_ON_ONCE((u64)size & DMA_IOVA_USE_SWIOTLB)) + return false; + + addr = iommu_dma_alloc_iova(domain, + iova_align(iovad, size + iova_off), + dma_get_mask(dev), dev); + if (!addr) + return false; + + state->addr = addr + iova_off; + state->__size = size; + return true; +} +EXPORT_SYMBOL_GPL(dma_iova_try_alloc); + +/** + * dma_iova_free - Free an IOVA space + * @dev: Device to free the IOVA space for + * @state: IOVA state + * + * Undoes a successful dma_try_iova_alloc(). + * + * Note that all dma_iova_link() calls need to be undone first. For callers + * that never call dma_iova_unlink(), dma_iova_destroy() can be used instead + * which unlinks all ranges and frees the IOVA space in a single efficient + * operation. + */ +void dma_iova_free(struct device *dev, struct dma_iova_state *state) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + size_t iova_start_pad = iova_offset(iovad, state->addr); + size_t size = dma_iova_size(state); + + iommu_dma_free_iova(domain, state->addr - iova_start_pad, + iova_align(iovad, size + iova_start_pad), NULL); +} +EXPORT_SYMBOL_GPL(dma_iova_free); + +static int __dma_iova_link(struct device *dev, dma_addr_t addr, + phys_addr_t phys, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + bool coherent = dev_is_dma_coherent(dev); + + if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_device(phys, size, dir); + + return iommu_map_nosync(iommu_get_dma_domain(dev), addr, phys, size, + dma_info_to_prot(dir, coherent, attrs), GFP_ATOMIC); +} + +static int iommu_dma_iova_bounce_and_link(struct device *dev, dma_addr_t addr, + phys_addr_t phys, size_t bounce_len, + enum dma_data_direction dir, unsigned long attrs, + size_t iova_start_pad) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iova_domain *iovad = &domain->iova_cookie->iovad; + phys_addr_t bounce_phys; + int error; + + bounce_phys = iommu_dma_map_swiotlb(dev, phys, bounce_len, dir, attrs); + if (bounce_phys == DMA_MAPPING_ERROR) + return -ENOMEM; + + error = __dma_iova_link(dev, addr - iova_start_pad, + bounce_phys - iova_start_pad, + iova_align(iovad, bounce_len), dir, attrs); + if (error) + swiotlb_tbl_unmap_single(dev, bounce_phys, bounce_len, dir, + attrs); + return error; +} + +static int iommu_dma_iova_link_swiotlb(struct device *dev, + struct dma_iova_state *state, phys_addr_t phys, size_t offset, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + size_t iova_start_pad = iova_offset(iovad, phys); + size_t iova_end_pad = iova_offset(iovad, phys + size); + dma_addr_t addr = state->addr + offset; + size_t mapped = 0; + int error; + + if (iova_start_pad) { + size_t bounce_len = min(size, iovad->granule - iova_start_pad); + + error = iommu_dma_iova_bounce_and_link(dev, addr, phys, + bounce_len, dir, attrs, iova_start_pad); + if (error) + return error; + state->__size |= DMA_IOVA_USE_SWIOTLB; + + mapped += bounce_len; + size -= bounce_len; + if (!size) + return 0; + } + + size -= iova_end_pad; + error = __dma_iova_link(dev, addr + mapped, phys + mapped, size, dir, + attrs); + if (error) + goto out_unmap; + mapped += size; + + if (iova_end_pad) { + error = iommu_dma_iova_bounce_and_link(dev, addr + mapped, + phys + mapped, iova_end_pad, dir, attrs, 0); + if (error) + goto out_unmap; + state->__size |= DMA_IOVA_USE_SWIOTLB; + } + + return 0; + +out_unmap: + dma_iova_unlink(dev, state, 0, mapped, dir, attrs); + return error; +} + +/** + * dma_iova_link - Link a range of IOVA space + * @dev: DMA device + * @state: IOVA state + * @phys: physical address to link + * @offset: offset into the IOVA state to map into + * @size: size of the buffer + * @dir: DMA direction + * @attrs: attributes of mapping properties + * + * Link a range of IOVA space for the given IOVA state without IOTLB sync. + * This function is used to link multiple physical addresses in contiguous + * IOVA space without performing costly IOTLB sync. + * + * The caller is responsible to call to dma_iova_sync() to sync IOTLB at + * the end of linkage. + */ +int dma_iova_link(struct device *dev, struct dma_iova_state *state, + phys_addr_t phys, size_t offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + size_t iova_start_pad = iova_offset(iovad, phys); + + if (WARN_ON_ONCE(iova_start_pad && offset > 0)) + return -EIO; + + if (dev_use_swiotlb(dev, size, dir) && + iova_unaligned(iovad, phys, size)) + return iommu_dma_iova_link_swiotlb(dev, state, phys, offset, + size, dir, attrs); + + return __dma_iova_link(dev, state->addr + offset - iova_start_pad, + phys - iova_start_pad, + iova_align(iovad, size + iova_start_pad), dir, attrs); +} +EXPORT_SYMBOL_GPL(dma_iova_link); + +/** + * dma_iova_sync - Sync IOTLB + * @dev: DMA device + * @state: IOVA state + * @offset: offset into the IOVA state to sync + * @size: size of the buffer + * + * Sync IOTLB for the given IOVA state. This function should be called on + * the IOVA-contiguous range created by one ore more dma_iova_link() calls + * to sync the IOTLB. + */ +int dma_iova_sync(struct device *dev, struct dma_iova_state *state, + size_t offset, size_t size) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + dma_addr_t addr = state->addr + offset; + size_t iova_start_pad = iova_offset(iovad, addr); + + return iommu_sync_map(domain, addr - iova_start_pad, + iova_align(iovad, size + iova_start_pad)); +} +EXPORT_SYMBOL_GPL(dma_iova_sync); + +static void iommu_dma_iova_unlink_range_slow(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + size_t iova_start_pad = iova_offset(iovad, addr); + dma_addr_t end = addr + size; + + do { + phys_addr_t phys; + size_t len; + + phys = iommu_iova_to_phys(domain, addr); + if (WARN_ON(!phys)) + /* Something very horrible happen here */ + return; + + len = min_t(size_t, + end - addr, iovad->granule - iova_start_pad); + + if (!dev_is_dma_coherent(dev) && + !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_cpu(phys, len, dir); + + swiotlb_tbl_unmap_single(dev, phys, len, dir, attrs); + + addr += len; + iova_start_pad = 0; + } while (addr < end); +} + +static void __iommu_dma_iova_unlink(struct device *dev, + struct dma_iova_state *state, size_t offset, size_t size, + enum dma_data_direction dir, unsigned long attrs, + bool free_iova) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + dma_addr_t addr = state->addr + offset; + size_t iova_start_pad = iova_offset(iovad, addr); + struct iommu_iotlb_gather iotlb_gather; + size_t unmapped; + + if ((state->__size & DMA_IOVA_USE_SWIOTLB) || + (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))) + iommu_dma_iova_unlink_range_slow(dev, addr, size, dir, attrs); + + iommu_iotlb_gather_init(&iotlb_gather); + iotlb_gather.queued = free_iova && READ_ONCE(cookie->fq_domain); + + size = iova_align(iovad, size + iova_start_pad); + addr -= iova_start_pad; + unmapped = iommu_unmap_fast(domain, addr, size, &iotlb_gather); + WARN_ON(unmapped != size); + + if (!iotlb_gather.queued) + iommu_iotlb_sync(domain, &iotlb_gather); + if (free_iova) + iommu_dma_free_iova(domain, addr, size, &iotlb_gather); +} + +/** + * dma_iova_unlink - Unlink a range of IOVA space + * @dev: DMA device + * @state: IOVA state + * @offset: offset into the IOVA state to unlink + * @size: size of the buffer + * @dir: DMA direction + * @attrs: attributes of mapping properties + * + * Unlink a range of IOVA space for the given IOVA state. + */ +void dma_iova_unlink(struct device *dev, struct dma_iova_state *state, + size_t offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + __iommu_dma_iova_unlink(dev, state, offset, size, dir, attrs, false); +} +EXPORT_SYMBOL_GPL(dma_iova_unlink); + +/** + * dma_iova_destroy - Finish a DMA mapping transaction + * @dev: DMA device + * @state: IOVA state + * @mapped_len: number of bytes to unmap + * @dir: DMA direction + * @attrs: attributes of mapping properties + * + * Unlink the IOVA range up to @mapped_len and free the entire IOVA space. The + * range of IOVA from dma_addr to @mapped_len must all be linked, and be the + * only linked IOVA in state. + */ +void dma_iova_destroy(struct device *dev, struct dma_iova_state *state, + size_t mapped_len, enum dma_data_direction dir, + unsigned long attrs) +{ + if (mapped_len) + __iommu_dma_iova_unlink(dev, state, 0, mapped_len, dir, attrs, + true); + else + /* + * We can be here if first call to dma_iova_link() failed and + * there is nothing to unlink, so let's be more clear. + */ + dma_iova_free(dev, state); +} +EXPORT_SYMBOL_GPL(dma_iova_destroy); + void iommu_setup_dma_ops(struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); @@ -1762,17 +2113,47 @@ out_err: dev->dma_iommu = false; } +static bool has_msi_cookie(const struct iommu_domain *domain) +{ + return domain && (domain->cookie_type == IOMMU_COOKIE_DMA_IOVA || + domain->cookie_type == IOMMU_COOKIE_DMA_MSI); +} + +static size_t cookie_msi_granule(const struct iommu_domain *domain) +{ + switch (domain->cookie_type) { + case IOMMU_COOKIE_DMA_IOVA: + return domain->iova_cookie->iovad.granule; + case IOMMU_COOKIE_DMA_MSI: + return PAGE_SIZE; + default: + BUG(); + } +} + +static struct list_head *cookie_msi_pages(const struct iommu_domain *domain) +{ + switch (domain->cookie_type) { + case IOMMU_COOKIE_DMA_IOVA: + return &domain->iova_cookie->msi_page_list; + case IOMMU_COOKIE_DMA_MSI: + return &domain->msi_cookie->msi_page_list; + default: + BUG(); + } +} + static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, phys_addr_t msi_addr, struct iommu_domain *domain) { - struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct list_head *msi_page_list = cookie_msi_pages(domain); struct iommu_dma_msi_page *msi_page; dma_addr_t iova; int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; - size_t size = cookie_msi_granule(cookie); + size_t size = cookie_msi_granule(domain); msi_addr &= ~(phys_addr_t)(size - 1); - list_for_each_entry(msi_page, &cookie->msi_page_list, list) + list_for_each_entry(msi_page, msi_page_list, list) if (msi_page->phys == msi_addr) return msi_page; @@ -1790,70 +2171,35 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, INIT_LIST_HEAD(&msi_page->list); msi_page->phys = msi_addr; msi_page->iova = iova; - list_add(&msi_page->list, &cookie->msi_page_list); + list_add(&msi_page->list, msi_page_list); return msi_page; out_free_iova: - iommu_dma_free_iova(cookie, iova, size, NULL); + iommu_dma_free_iova(domain, iova, size, NULL); out_free_page: kfree(msi_page); return NULL; } -/** - * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain - * @desc: MSI descriptor, will store the MSI page - * @msi_addr: MSI target address to be mapped - * - * Return: 0 on success or negative error code if the mapping failed. - */ -int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) +int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, + phys_addr_t msi_addr) { struct device *dev = msi_desc_to_dev(desc); - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct iommu_dma_msi_page *msi_page; - static DEFINE_MUTEX(msi_prepare_lock); /* see below */ + const struct iommu_dma_msi_page *msi_page; - if (!domain || !domain->iova_cookie) { - desc->iommu_cookie = NULL; + if (!has_msi_cookie(domain)) { + msi_desc_set_iommu_msi_iova(desc, 0, 0); return 0; } - /* - * In fact the whole prepare operation should already be serialised by - * irq_domain_mutex further up the callchain, but that's pretty subtle - * on its own, so consider this locking as failsafe documentation... - */ - mutex_lock(&msi_prepare_lock); + iommu_group_mutex_assert(dev); msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain); - mutex_unlock(&msi_prepare_lock); - - msi_desc_set_iommu_cookie(desc, msi_page); - if (!msi_page) return -ENOMEM; - return 0; -} -/** - * iommu_dma_compose_msi_msg() - Apply translation to an MSI message - * @desc: MSI descriptor prepared by iommu_dma_prepare_msi() - * @msg: MSI message containing target physical address - */ -void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) -{ - struct device *dev = msi_desc_to_dev(desc); - const struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - const struct iommu_dma_msi_page *msi_page; - - msi_page = msi_desc_get_iommu_cookie(desc); - - if (!domain || !domain->iova_cookie || WARN_ON(!msi_page)) - return; - - msg->address_hi = upper_32_bits(msi_page->iova); - msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1; - msg->address_lo += lower_32_bits(msi_page->iova); + msi_desc_set_iommu_msi_iova(desc, msi_page->iova, + ilog2(cookie_msi_granule(domain))); + return 0; } static int iommu_dma_init(void) diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h index c12d63457c76..eca201c1f963 100644 --- a/drivers/iommu/dma-iommu.h +++ b/drivers/iommu/dma-iommu.h @@ -13,11 +13,15 @@ void iommu_setup_dma_ops(struct device *dev); int iommu_get_dma_cookie(struct iommu_domain *domain); void iommu_put_dma_cookie(struct iommu_domain *domain); +void iommu_put_msi_cookie(struct iommu_domain *domain); int iommu_dma_init_fq(struct iommu_domain *domain); void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); +int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, + phys_addr_t msi_addr); + extern bool iommu_dma_forcedac; #else /* CONFIG_IOMMU_DMA */ @@ -40,9 +44,19 @@ static inline void iommu_put_dma_cookie(struct iommu_domain *domain) { } +static inline void iommu_put_msi_cookie(struct iommu_domain *domain) +{ +} + static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) { } +static inline int iommu_dma_sw_msi(struct iommu_domain *domain, + struct msi_desc *desc, phys_addr_t msi_addr) +{ + return -ENODEV; +} + #endif /* CONFIG_IOMMU_DMA */ #endif /* __DMA_IOMMU_H */ diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 317266aca6e2..fcb6a0f7c082 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -902,11 +902,11 @@ static struct iommu_domain *exynos_iommu_domain_alloc_paging(struct device *dev) if (!domain) return NULL; - domain->pgtable = iommu_alloc_pages(GFP_KERNEL, 2); + domain->pgtable = iommu_alloc_pages_sz(GFP_KERNEL, SZ_16K); if (!domain->pgtable) goto err_pgtable; - domain->lv2entcnt = iommu_alloc_pages(GFP_KERNEL, 1); + domain->lv2entcnt = iommu_alloc_pages_sz(GFP_KERNEL, SZ_8K); if (!domain->lv2entcnt) goto err_counter; @@ -932,9 +932,9 @@ static struct iommu_domain *exynos_iommu_domain_alloc_paging(struct device *dev) return &domain->domain; err_lv2ent: - iommu_free_pages(domain->lv2entcnt, 1); + iommu_free_pages(domain->lv2entcnt); err_counter: - iommu_free_pages(domain->pgtable, 2); + iommu_free_pages(domain->pgtable); err_pgtable: kfree(domain); return NULL; @@ -975,8 +975,8 @@ static void exynos_iommu_domain_free(struct iommu_domain *iommu_domain) phys_to_virt(base)); } - iommu_free_pages(domain->pgtable, 2); - iommu_free_pages(domain->lv2entcnt, 1); + iommu_free_pages(domain->pgtable); + iommu_free_pages(domain->lv2entcnt); kfree(domain); } diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 30be786bff11..5f08523f97cb 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -64,7 +64,7 @@ static int update_liodn_stash(int liodn, struct fsl_dma_domain *dma_domain, spin_lock_irqsave(&iommu_lock, flags); ret = pamu_update_paace_stash(liodn, val); if (ret) { - pr_debug("Failed to update SPAACE for liodn %d\n ", liodn); + pr_debug("Failed to update SPAACE for liodn %d\n", liodn); spin_unlock_irqrestore(&iommu_lock, flags); return ret; } diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c index 2a86aa5d54c6..0961ac805944 100644 --- a/drivers/iommu/hyperv-iommu.c +++ b/drivers/iommu/hyperv-iommu.c @@ -130,7 +130,7 @@ static int __init hyperv_prepare_irq_remapping(void) x86_init.hyper.msi_ext_dest_id()) return -ENODEV; - if (hv_root_partition) { + if (hv_root_partition()) { name = "HYPERV-ROOT-IR"; ops = &hyperv_root_ir_domain_ops; } else { @@ -151,7 +151,7 @@ static int __init hyperv_prepare_irq_remapping(void) return -ENOMEM; } - if (hv_root_partition) + if (hv_root_partition()) return 0; /* The rest is only relevant to guests */ /* @@ -193,15 +193,13 @@ struct hyperv_root_ir_data { static void hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) { - u64 status; - u32 vector; - struct irq_cfg *cfg; - int ioapic_id; - const struct cpumask *affinity; - int cpu; - struct hv_interrupt_entry entry; struct hyperv_root_ir_data *data = irq_data->chip_data; + struct hv_interrupt_entry entry; + const struct cpumask *affinity; struct IO_APIC_route_entry e; + struct irq_cfg *cfg; + int cpu, ioapic_id; + u32 vector; cfg = irqd_cfg(irq_data); affinity = irq_data_get_effective_affinity_mask(irq_data); @@ -214,23 +212,16 @@ hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) && data->entry.ioapic_rte.as_uint64) { entry = data->entry; - status = hv_unmap_ioapic_interrupt(ioapic_id, &entry); - - if (status != HV_STATUS_SUCCESS) - pr_debug("%s: unexpected unmap status %lld\n", __func__, status); + (void)hv_unmap_ioapic_interrupt(ioapic_id, &entry); data->entry.ioapic_rte.as_uint64 = 0; data->entry.source = 0; /* Invalid source */ } - status = hv_map_ioapic_interrupt(ioapic_id, data->is_level, cpu, - vector, &entry); - - if (status != HV_STATUS_SUCCESS) { - pr_err("%s: map hypercall failed, status %lld\n", __func__, status); + if (hv_map_ioapic_interrupt(ioapic_id, data->is_level, cpu, + vector, &entry)) return; - } data->entry = entry; @@ -322,10 +313,10 @@ static void hyperv_root_irq_remapping_free(struct irq_domain *domain, data = irq_data->chip_data; e = &data->entry; - if (e->source == HV_DEVICE_TYPE_IOAPIC - && e->ioapic_rte.as_uint64) - hv_unmap_ioapic_interrupt(data->ioapic_id, - &data->entry); + if (e->source == HV_DEVICE_TYPE_IOAPIC && + e->ioapic_rte.as_uint64) + (void)hv_unmap_ioapic_interrupt(data->ioapic_id, + &data->entry); kfree(data); } diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index 6c7528130cf9..ada651c4a01b 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -1,11 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_DMAR_TABLE) += dmar.o -obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o cache.o prq.o -obj-$(CONFIG_DMAR_TABLE) += trace.o +obj-y += iommu.o pasid.o nested.o cache.o prq.o +obj-$(CONFIG_DMAR_TABLE) += dmar.o trace.o obj-$(CONFIG_DMAR_PERF) += perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o -ifdef CONFIG_INTEL_IOMMU obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o -endif obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index fc35cba59145..c8b79de84d3f 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -40,9 +40,8 @@ static bool cache_tage_match(struct cache_tag *tag, u16 domain_id, } /* Assign a cache tag with specified type to domain. */ -static int cache_tag_assign(struct dmar_domain *domain, u16 did, - struct device *dev, ioasid_t pasid, - enum cache_tag_type type) +int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev, + ioasid_t pasid, enum cache_tag_type type) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; @@ -423,22 +422,6 @@ static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_ domain->qi_batch); } -static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag) -{ - struct intel_iommu *iommu = tag->iommu; - struct device_domain_info *info; - u16 sid; - - info = dev_iommu_priv_get(tag->dev); - sid = PCI_DEVID(info->bus, info->devfn); - - qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, - MAX_AGAW_PFN_WIDTH, domain->qi_batch); - if (info->dtlb_extra_inval) - qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, - MAX_AGAW_PFN_WIDTH, domain->qi_batch); -} - /* * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive) * when the memory mappings in the target domain have been modified. @@ -509,7 +492,7 @@ void cache_tag_flush_all(struct dmar_domain *domain) break; case CACHE_TAG_DEVTLB: case CACHE_TAG_NESTING_DEVTLB: - cache_tag_flush_devtlb_all(domain, tag); + cache_tag_flush_devtlb_psi(domain, tag, 0, MAX_AGAW_PFN_WIDTH); break; } diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index e540092d664d..b61d9ea27aa9 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1099,6 +1099,9 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) spin_lock_init(&iommu->device_rbtree_lock); mutex_init(&iommu->iopf_lock); iommu->node = NUMA_NO_NODE; + spin_lock_init(&iommu->lock); + ida_init(&iommu->domain_ida); + mutex_init(&iommu->did_lock); ver = readl(iommu->reg + DMAR_VER_REG); pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n", @@ -1187,7 +1190,7 @@ static void free_iommu(struct intel_iommu *iommu) } if (iommu->qi) { - iommu_free_page(iommu->qi->desc); + iommu_free_pages(iommu->qi->desc); kfree(iommu->qi->desc_status); kfree(iommu->qi); } @@ -1195,6 +1198,7 @@ static void free_iommu(struct intel_iommu *iommu) if (iommu->reg) unmap_iommu(iommu); + ida_destroy(&iommu->domain_ida); ida_free(&dmar_seq_ids, iommu->seq_id); kfree(iommu); } @@ -1681,7 +1685,6 @@ int dmar_enable_qi(struct intel_iommu *iommu) { struct q_inval *qi; void *desc; - int order; if (!ecap_qis(iommu->ecap)) return -ENOENT; @@ -1702,8 +1705,9 @@ int dmar_enable_qi(struct intel_iommu *iommu) * Need two pages to accommodate 256 descriptors of 256 bits each * if the remapping hardware supports scalable mode translation. */ - order = ecap_smts(iommu->ecap) ? 1 : 0; - desc = iommu_alloc_pages_node(iommu->node, GFP_ATOMIC, order); + desc = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, + ecap_smts(iommu->ecap) ? SZ_8K : + SZ_4K); if (!desc) { kfree(qi); iommu->qi = NULL; @@ -1714,7 +1718,7 @@ int dmar_enable_qi(struct intel_iommu *iommu) qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC); if (!qi->desc_status) { - iommu_free_page(qi->desc); + iommu_free_pages(qi->desc); kfree(qi); iommu->qi = NULL; return -ENOMEM; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 76417bd5e926..ab4cd742f095 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -397,7 +397,8 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, if (!alloc) return NULL; - context = iommu_alloc_page_node(iommu->node, GFP_ATOMIC); + context = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, + SZ_4K); if (!context) return NULL; @@ -571,17 +572,17 @@ static void free_context_table(struct intel_iommu *iommu) for (i = 0; i < ROOT_ENTRY_NR; i++) { context = iommu_context_addr(iommu, i, 0, 0); if (context) - iommu_free_page(context); + iommu_free_pages(context); if (!sm_supported(iommu)) continue; context = iommu_context_addr(iommu, i, 0x80, 0); if (context) - iommu_free_page(context); + iommu_free_pages(context); } - iommu_free_page(iommu->root_entry); + iommu_free_pages(iommu->root_entry); iommu->root_entry = NULL; } @@ -731,20 +732,22 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval, tmp; - tmp_page = iommu_alloc_page_node(domain->nid, gfp); + tmp_page = iommu_alloc_pages_node_sz(domain->nid, gfp, + SZ_4K); if (!tmp_page) return NULL; domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); - pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; + pteval = virt_to_phys(tmp_page) | DMA_PTE_READ | + DMA_PTE_WRITE; if (domain->use_first_level) pteval |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; tmp = 0ULL; if (!try_cmpxchg64(&pte->val, &tmp, pteval)) /* Someone else set it while we were thinking; use theirs. */ - iommu_free_page(tmp_page); + iommu_free_pages(tmp_page); else domain_flush_cache(domain, pte, sizeof(*pte)); } @@ -857,7 +860,7 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level, last_pfn < level_pfn + level_size(level) - 1)) { dma_clear_pte(pte); domain_flush_cache(domain, pte, sizeof(*pte)); - iommu_free_page(level_pte); + iommu_free_pages(level_pte); } next: pfn += level_size(level); @@ -881,7 +884,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, /* free pgd */ if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { - iommu_free_page(domain->pgd); + iommu_free_pages(domain->pgd); domain->pgd = NULL; } } @@ -893,18 +896,16 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, The 'pte' argument is the *parent* PTE, pointing to the page that is to be freed. */ static void dma_pte_list_pagetables(struct dmar_domain *domain, - int level, struct dma_pte *pte, - struct list_head *freelist) + int level, struct dma_pte *parent_pte, + struct iommu_pages_list *freelist) { - struct page *pg; + struct dma_pte *pte = phys_to_virt(dma_pte_addr(parent_pte)); - pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT); - list_add_tail(&pg->lru, freelist); + iommu_pages_list_add(freelist, pte); if (level == 1) return; - pte = page_address(pg); do { if (dma_pte_present(pte) && !dma_pte_superpage(pte)) dma_pte_list_pagetables(domain, level - 1, pte, freelist); @@ -915,7 +916,7 @@ static void dma_pte_list_pagetables(struct dmar_domain *domain, static void dma_pte_clear_level(struct dmar_domain *domain, int level, struct dma_pte *pte, unsigned long pfn, unsigned long start_pfn, unsigned long last_pfn, - struct list_head *freelist) + struct iommu_pages_list *freelist) { struct dma_pte *first_pte = NULL, *last_pte = NULL; @@ -960,7 +961,8 @@ next: the page tables, and may have cached the intermediate levels. The pages can only be freed after the IOTLB flush has been done. */ static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn, - unsigned long last_pfn, struct list_head *freelist) + unsigned long last_pfn, + struct iommu_pages_list *freelist) { if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) || WARN_ON(start_pfn > last_pfn)) @@ -972,8 +974,7 @@ static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn, /* free pgd */ if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { - struct page *pgd_page = virt_to_page(domain->pgd); - list_add_tail(&pgd_page->lru, freelist); + iommu_pages_list_add(freelist, domain->pgd); domain->pgd = NULL; } } @@ -983,7 +984,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { struct root_entry *root; - root = iommu_alloc_page_node(iommu->node, GFP_ATOMIC); + root = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, SZ_4K); if (!root) { pr_err("Allocating root entry for %s failed\n", iommu->name); @@ -1172,32 +1173,59 @@ static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev) return true; } -static void iommu_enable_pci_caps(struct device_domain_info *info) +static void iommu_enable_pci_ats(struct device_domain_info *info) { struct pci_dev *pdev; - if (!dev_is_pci(info->dev)) + if (!info->ats_supported) return; pdev = to_pci_dev(info->dev); - if (info->ats_supported && pci_ats_page_aligned(pdev) && - !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) + if (!pci_ats_page_aligned(pdev)) + return; + + if (!pci_enable_ats(pdev, VTD_PAGE_SHIFT)) info->ats_enabled = 1; } -static void iommu_disable_pci_caps(struct device_domain_info *info) +static void iommu_disable_pci_ats(struct device_domain_info *info) +{ + if (!info->ats_enabled) + return; + + pci_disable_ats(to_pci_dev(info->dev)); + info->ats_enabled = 0; +} + +static void iommu_enable_pci_pri(struct device_domain_info *info) { struct pci_dev *pdev; - if (!dev_is_pci(info->dev)) + if (!info->ats_enabled || !info->pri_supported) return; pdev = to_pci_dev(info->dev); + /* PASID is required in PRG Response Message. */ + if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev)) + return; - if (info->ats_enabled) { - pci_disable_ats(pdev); - info->ats_enabled = 0; - } + if (pci_reset_pri(pdev)) + return; + + if (!pci_enable_pri(pdev, PRQ_DEPTH)) + info->pri_enabled = 1; +} + +static void iommu_disable_pci_pri(struct device_domain_info *info) +{ + if (!info->pri_enabled) + return; + + if (WARN_ON(info->iopf_refcount)) + iopf_queue_remove_device(info->iommu->iopf_queue, info->dev); + + pci_disable_pri(to_pci_dev(info->dev)); + info->pri_enabled = 0; } static void intel_flush_iotlb_all(struct iommu_domain *domain) @@ -1261,52 +1289,13 @@ static void iommu_disable_translation(struct intel_iommu *iommu) raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } -static int iommu_init_domains(struct intel_iommu *iommu) -{ - u32 ndomains; - - ndomains = cap_ndoms(iommu->cap); - pr_debug("%s: Number of Domains supported <%d>\n", - iommu->name, ndomains); - - spin_lock_init(&iommu->lock); - - iommu->domain_ids = bitmap_zalloc(ndomains, GFP_KERNEL); - if (!iommu->domain_ids) - return -ENOMEM; - - /* - * If Caching mode is set, then invalid translations are tagged - * with domain-id 0, hence we need to pre-allocate it. We also - * use domain-id 0 as a marker for non-allocated domain-id, so - * make sure it is not used for a real domain. - */ - set_bit(0, iommu->domain_ids); - - /* - * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid - * entry for first-level or pass-through translation modes should - * be programmed with a domain id different from those used for - * second-level or nested translation. We reserve a domain id for - * this purpose. This domain id is also used for identity domain - * in legacy mode. - */ - set_bit(FLPT_DEFAULT_DID, iommu->domain_ids); - - return 0; -} - static void disable_dmar_iommu(struct intel_iommu *iommu) { - if (!iommu->domain_ids) - return; - /* * All iommu domains must have been detached from the devices, * hence there should be no domain IDs in use. */ - if (WARN_ON(bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap)) - > NUM_RESERVED_DID)) + if (WARN_ON(!ida_is_empty(&iommu->domain_ida))) return; if (iommu->gcmd & DMA_GCMD_TE) @@ -1315,11 +1304,6 @@ static void disable_dmar_iommu(struct intel_iommu *iommu) static void free_dmar_iommu(struct intel_iommu *iommu) { - if (iommu->domain_ids) { - bitmap_free(iommu->domain_ids); - iommu->domain_ids = NULL; - } - if (iommu->copied_tables) { bitmap_free(iommu->copied_tables); iommu->copied_tables = NULL; @@ -1352,7 +1336,6 @@ static bool first_level_by_default(struct intel_iommu *iommu) int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) { struct iommu_domain_info *info, *curr; - unsigned long ndomains; int num, ret = -ENOSPC; if (domain->domain.type == IOMMU_DOMAIN_SVA) @@ -1362,40 +1345,36 @@ int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) if (!info) return -ENOMEM; - spin_lock(&iommu->lock); + guard(mutex)(&iommu->did_lock); curr = xa_load(&domain->iommu_array, iommu->seq_id); if (curr) { curr->refcnt++; - spin_unlock(&iommu->lock); kfree(info); return 0; } - ndomains = cap_ndoms(iommu->cap); - num = find_first_zero_bit(iommu->domain_ids, ndomains); - if (num >= ndomains) { + num = ida_alloc_range(&iommu->domain_ida, IDA_START_DID, + cap_ndoms(iommu->cap) - 1, GFP_KERNEL); + if (num < 0) { pr_err("%s: No free domain ids\n", iommu->name); goto err_unlock; } - set_bit(num, iommu->domain_ids); info->refcnt = 1; info->did = num; info->iommu = iommu; curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id, - NULL, info, GFP_ATOMIC); + NULL, info, GFP_KERNEL); if (curr) { ret = xa_err(curr) ? : -EBUSY; goto err_clear; } - spin_unlock(&iommu->lock); return 0; err_clear: - clear_bit(info->did, iommu->domain_ids); + ida_free(&iommu->domain_ida, info->did); err_unlock: - spin_unlock(&iommu->lock); kfree(info); return ret; } @@ -1407,21 +1386,20 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) if (domain->domain.type == IOMMU_DOMAIN_SVA) return; - spin_lock(&iommu->lock); + guard(mutex)(&iommu->did_lock); info = xa_load(&domain->iommu_array, iommu->seq_id); if (--info->refcnt == 0) { - clear_bit(info->did, iommu->domain_ids); + ida_free(&iommu->domain_ida, info->did); xa_erase(&domain->iommu_array, iommu->seq_id); - domain->nid = NUMA_NO_NODE; kfree(info); } - spin_unlock(&iommu->lock); } static void domain_exit(struct dmar_domain *domain) { if (domain->pgd) { - LIST_HEAD(freelist); + struct iommu_pages_list freelist = + IOMMU_PAGES_LIST_INIT(freelist); domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist); iommu_put_pages_list(&freelist); @@ -1556,12 +1534,19 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev) struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; u8 bus = info->bus, devfn = info->devfn; + int ret; if (!dev_is_pci(dev)) return domain_context_mapping_one(domain, iommu, bus, devfn); - return pci_for_each_dma_alias(to_pci_dev(dev), - domain_context_mapping_cb, domain); + ret = pci_for_each_dma_alias(to_pci_dev(dev), + domain_context_mapping_cb, domain); + if (ret) + return ret; + + iommu_enable_pci_ats(info); + + return 0; } /* Return largest possible superpage level for a given mapping */ @@ -1646,9 +1631,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, } attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); - attr |= DMA_FL_PTE_PRESENT; if (domain->use_first_level) { - attr |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; + attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; if (prot & DMA_PTE_WRITE) attr |= DMA_FL_PTE_DIRTY; } @@ -1748,7 +1732,7 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - intel_context_flush_present(info, context, did, true); + intel_context_flush_no_pasid(info, context, did); } int __domain_setup_first_level(struct intel_iommu *iommu, @@ -1811,6 +1795,18 @@ static int domain_setup_first_level(struct intel_iommu *iommu, (pgd_t *)pgd, flags, old); } +static bool domain_need_iotlb_sync_map(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + if (cap_caching_mode(iommu->cap) && !domain->use_first_level) + return true; + + if (rwbf_quirk || cap_rwbf(iommu->cap)) + return true; + + return false; +} + static int dmar_domain_attach_device(struct dmar_domain *domain, struct device *dev) { @@ -1824,6 +1820,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, return ret; info->domain = domain; + info->domain_attached = true; spin_lock_irqsave(&domain->lock, flags); list_add(&info->link, &domain->devices); spin_unlock_irqrestore(&domain->lock, flags); @@ -1843,12 +1840,12 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (ret) goto out_block_translation; - iommu_enable_pci_caps(info); - ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); if (ret) goto out_block_translation; + domain->iotlb_sync_map |= domain_need_iotlb_sync_map(domain, iommu); + return 0; out_block_translation: @@ -1994,7 +1991,8 @@ static int copy_context_table(struct intel_iommu *iommu, if (!old_ce) goto out; - new_ce = iommu_alloc_page_node(iommu->node, GFP_KERNEL); + new_ce = iommu_alloc_pages_node_sz(iommu->node, + GFP_KERNEL, SZ_4K); if (!new_ce) goto out_unmap; @@ -2009,7 +2007,7 @@ static int copy_context_table(struct intel_iommu *iommu, did = context_domain_id(&ce); if (did >= 0 && did < cap_ndoms(iommu->cap)) - set_bit(did, iommu->domain_ids); + ida_alloc_range(&iommu->domain_ida, did, did, GFP_KERNEL); set_context_copied(iommu, bus, devfn); new_ce[idx] = ce; @@ -2136,11 +2134,6 @@ static int __init init_dmars(void) } intel_iommu_init_qi(iommu); - - ret = iommu_init_domains(iommu); - if (ret) - goto free_iommu; - init_translation_status(iommu); if (translation_pre_enabled(iommu) && !is_kdump_kernel()) { @@ -2618,9 +2611,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) if (iommu->gcmd & DMA_GCMD_TE) iommu_disable_translation(iommu); - ret = iommu_init_domains(iommu); - if (ret == 0) - ret = iommu_alloc_root_entry(iommu); + ret = iommu_alloc_root_entry(iommu); if (ret) goto out; @@ -2711,7 +2702,6 @@ static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev) struct device *tmp; int i; - dev = pci_physfn(dev); rcu_read_lock(); list_for_each_entry_rcu(satcu, &dmar_satc_units, list) { @@ -2728,15 +2718,16 @@ out: return satcu; } -static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu) +static bool dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu) { - int i, ret = 1; - struct pci_bus *bus; struct pci_dev *bridge = NULL; - struct device *tmp; - struct acpi_dmar_atsr *atsr; struct dmar_atsr_unit *atsru; struct dmar_satc_unit *satcu; + struct acpi_dmar_atsr *atsr; + bool supported = true; + struct pci_bus *bus; + struct device *tmp; + int i; dev = pci_physfn(dev); satcu = dmar_find_matched_satc_unit(dev); @@ -2754,11 +2745,11 @@ static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu) bridge = bus->self; /* If it's an integrated device, allow ATS */ if (!bridge) - return 1; + return true; /* Connected via non-PCIe: no ATS */ if (!pci_is_pcie(bridge) || pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) - return 0; + return false; /* If we found the root port, look it up in the ATSR */ if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) break; @@ -2777,11 +2768,11 @@ static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu) if (atsru->include_all) goto out; } - ret = 0; + supported = false; out: rcu_read_unlock(); - return ret; + return supported; } int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) @@ -2939,9 +2930,14 @@ static ssize_t domains_used_show(struct device *dev, struct device_attribute *attr, char *buf) { struct intel_iommu *iommu = dev_to_intel_iommu(dev); - return sysfs_emit(buf, "%d\n", - bitmap_weight(iommu->domain_ids, - cap_ndoms(iommu->cap))); + unsigned int count = 0; + int id; + + for (id = 0; id < cap_ndoms(iommu->cap); id++) + if (ida_exists(&iommu->domain_ida, id)) + count++; + + return sysfs_emit(buf, "%d\n", count); } static DEVICE_ATTR_RO(domains_used); @@ -3210,6 +3206,7 @@ static void domain_context_clear(struct device_domain_info *info) pci_for_each_dma_alias(to_pci_dev(info->dev), &domain_context_clear_one_cb, info); + iommu_disable_pci_ats(info); } /* @@ -3223,10 +3220,13 @@ void device_block_translation(struct device *dev) struct intel_iommu *iommu = info->iommu; unsigned long flags; + /* Device in DMA blocking state. Noting to do. */ + if (!info->domain_attached) + return; + if (info->domain) cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID); - iommu_disable_pci_caps(info); if (!dev_is_real_dma_subdevice(dev)) { if (sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, dev, @@ -3235,6 +3235,9 @@ void device_block_translation(struct device *dev) domain_context_clear(info); } + /* Device now in DMA blocking state. */ + info->domain_attached = false; + if (!info->domain) return; @@ -3249,6 +3252,9 @@ void device_block_translation(struct device *dev) static int blocking_domain_attach_dev(struct iommu_domain *domain, struct device *dev) { + struct device_domain_info *info = dev_iommu_priv_get(dev); + + iopf_for_domain_remove(info->domain ? &info->domain->domain : NULL, dev); device_block_translation(dev); return 0; } @@ -3327,7 +3333,7 @@ static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_st domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw); /* always allocate the top pgd */ - domain->pgd = iommu_alloc_page_node(domain->nid, GFP_KERNEL); + domain->pgd = iommu_alloc_pages_node_sz(domain->nid, GFP_KERNEL, SZ_4K); if (!domain->pgd) { kfree(domain); return ERR_PTR(-ENOMEM); @@ -3350,7 +3356,8 @@ intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, bool first_stage; if (flags & - (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING))) + (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING | + IOMMU_HWPT_ALLOC_PASID))) return ERR_PTR(-EOPNOTSUPP); if (nested_parent && !nested_supported(iommu)) return ERR_PTR(-EOPNOTSUPP); @@ -3458,7 +3465,15 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, if (ret) return ret; - return dmar_domain_attach_device(to_dmar_domain(domain), dev); + ret = iopf_for_domain_set(domain, dev); + if (ret) + return ret; + + ret = dmar_domain_attach_device(to_dmar_domain(domain), dev); + if (ret) + iopf_for_domain_remove(domain, dev); + + return ret; } static int intel_iommu_map(struct iommu_domain *domain, @@ -3569,7 +3584,8 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { cache_tag_flush_range(to_dmar_domain(domain), gather->start, - gather->end, list_empty(&gather->freelist)); + gather->end, + iommu_pages_list_empty(&gather->freelist)); iommu_put_pages_list(&gather->freelist); } @@ -3751,16 +3767,6 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) intel_iommu_debugfs_create_dev(info); - /* - * The PCIe spec, in its wisdom, declares that the behaviour of the - * device is undefined if you enable PASID support after ATS support. - * So always enable PASID support on devices which have it, even if - * we can't yet know if we're ever going to use it. - */ - if (info->pasid_supported && - !pci_enable_pasid(pdev, info->pasid_supported & ~1)) - info->pasid_enabled = 1; - return &iommu->iommu; free_table: intel_pasid_free_table(dev); @@ -3772,11 +3778,43 @@ free: return ERR_PTR(ret); } +static void intel_iommu_probe_finalize(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + + /* + * The PCIe spec, in its wisdom, declares that the behaviour of the + * device is undefined if you enable PASID support after ATS support. + * So always enable PASID support on devices which have it, even if + * we can't yet know if we're ever going to use it. + */ + if (info->pasid_supported && + !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1)) + info->pasid_enabled = 1; + + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { + iommu_enable_pci_ats(info); + /* Assign a DEVTLB cache tag to the default domain. */ + if (info->ats_enabled && info->domain) { + u16 did = domain_id_iommu(info->domain, iommu); + + if (cache_tag_assign(info->domain, did, dev, + IOMMU_NO_PASID, CACHE_TAG_DEVTLB)) + iommu_disable_pci_ats(info); + } + } + iommu_enable_pci_pri(info); +} + static void intel_iommu_release_device(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; + iommu_disable_pci_pri(info); + iommu_disable_pci_ats(info); + if (info->pasid_enabled) { pci_disable_pasid(to_pci_dev(dev)); info->pasid_enabled = 0; @@ -3794,7 +3832,6 @@ static void intel_iommu_release_device(struct device *dev) intel_pasid_free_table(dev); intel_iommu_debugfs_remove_dev(info); kfree(info); - set_dma_ops(dev, NULL); } static void intel_iommu_get_resv_regions(struct device *device, @@ -3863,181 +3900,44 @@ static struct iommu_group *intel_iommu_device_group(struct device *dev) return generic_device_group(dev); } -static int intel_iommu_enable_sva(struct device *dev) +int intel_iommu_enable_iopf(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); - struct intel_iommu *iommu; - - if (!info || dmar_disabled) - return -EINVAL; - - iommu = info->iommu; - if (!iommu) - return -EINVAL; - - if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) - return -ENODEV; - - if (!info->pasid_enabled || !info->ats_enabled) - return -EINVAL; - - /* - * Devices having device-specific I/O fault handling should not - * support PCI/PRI. The IOMMU side has no means to check the - * capability of device-specific IOPF. Therefore, IOMMU can only - * default that if the device driver enables SVA on a non-PRI - * device, it will handle IOPF in its own way. - */ - if (!info->pri_supported) - return 0; - - /* Devices supporting PRI should have it enabled. */ - if (!info->pri_enabled) - return -EINVAL; - - return 0; -} - -static int context_flip_pri(struct device_domain_info *info, bool enable) -{ struct intel_iommu *iommu = info->iommu; - u8 bus = info->bus, devfn = info->devfn; - struct context_entry *context; - u16 did; - - spin_lock(&iommu->lock); - if (context_copied(iommu, bus, devfn)) { - spin_unlock(&iommu->lock); - return -EINVAL; - } - - context = iommu_context_addr(iommu, bus, devfn, false); - if (!context || !context_present(context)) { - spin_unlock(&iommu->lock); - return -ENODEV; - } - did = context_domain_id(context); - - if (enable) - context_set_sm_pre(context); - else - context_clear_sm_pre(context); - - if (!ecap_coherent(iommu->ecap)) - clflush_cache_range(context, sizeof(*context)); - intel_context_flush_present(info, context, did, true); - spin_unlock(&iommu->lock); - - return 0; -} - -static int intel_iommu_enable_iopf(struct device *dev) -{ - struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; - struct device_domain_info *info = dev_iommu_priv_get(dev); - struct intel_iommu *iommu; int ret; - if (!pdev || !info || !info->ats_enabled || !info->pri_supported) + if (!info->pri_enabled) return -ENODEV; - if (info->pri_enabled) - return -EBUSY; - - iommu = info->iommu; - if (!iommu) - return -EINVAL; - - /* PASID is required in PRG Response Message. */ - if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev)) - return -EINVAL; - - ret = pci_reset_pri(pdev); - if (ret) - return ret; + /* pri_enabled is protected by the group mutex. */ + iommu_group_mutex_assert(dev); + if (info->iopf_refcount) { + info->iopf_refcount++; + return 0; + } ret = iopf_queue_add_device(iommu->iopf_queue, dev); if (ret) return ret; - ret = context_flip_pri(info, true); - if (ret) - goto err_remove_device; - - ret = pci_enable_pri(pdev, PRQ_DEPTH); - if (ret) - goto err_clear_pri; - - info->pri_enabled = 1; + info->iopf_refcount = 1; return 0; -err_clear_pri: - context_flip_pri(info, false); -err_remove_device: - iopf_queue_remove_device(iommu->iopf_queue, dev); - - return ret; } -static int intel_iommu_disable_iopf(struct device *dev) +void intel_iommu_disable_iopf(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; - if (!info->pri_enabled) - return -EINVAL; + if (WARN_ON(!info->pri_enabled || !info->iopf_refcount)) + return; - /* Disable new PRI reception: */ - context_flip_pri(info, false); + iommu_group_mutex_assert(dev); + if (--info->iopf_refcount) + return; - /* - * Remove device from fault queue and acknowledge all outstanding - * PRQs to the device: - */ iopf_queue_remove_device(iommu->iopf_queue, dev); - - /* - * PCIe spec states that by clearing PRI enable bit, the Page - * Request Interface will not issue new page requests, but has - * outstanding page requests that have been transmitted or are - * queued for transmission. This is supposed to be called after - * the device driver has stopped DMA, all PASIDs have been - * unbound and the outstanding PRQs have been drained. - */ - pci_disable_pri(to_pci_dev(dev)); - info->pri_enabled = 0; - - return 0; -} - -static int -intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) -{ - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - return intel_iommu_enable_iopf(dev); - - case IOMMU_DEV_FEAT_SVA: - return intel_iommu_enable_sva(dev); - - default: - return -ENODEV; - } -} - -static int -intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) -{ - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - return intel_iommu_disable_iopf(dev); - - case IOMMU_DEV_FEAT_SVA: - return 0; - - default: - return -ENODEV; - } } static bool intel_iommu_is_attach_deferred(struct device *dev) @@ -4067,7 +3967,10 @@ static bool risky_device(struct pci_dev *pdev) static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size) { - cache_tag_flush_range_np(to_dmar_domain(domain), iova, iova + size - 1); + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + + if (dmar_domain->iotlb_sync_map) + cache_tag_flush_range_np(dmar_domain, iova, iova + size - 1); return 0; } @@ -4114,6 +4017,7 @@ static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, struct device_domain_info *info = dev_iommu_priv_get(dev); intel_pasid_tear_down_entry(info->iommu, dev, pasid, false); + iopf_for_domain_remove(old, dev); domain_remove_dev_pasid(old, dev, pasid); return 0; @@ -4186,6 +4090,10 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, if (IS_ERR(dev_pasid)) return PTR_ERR(dev_pasid); + ret = iopf_for_domain_replace(domain, old, dev); + if (ret) + goto out_remove_dev_pasid; + if (dmar_domain->use_first_level) ret = domain_setup_first_level(iommu, dmar_domain, dev, pasid, old); @@ -4193,7 +4101,7 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, ret = domain_setup_second_level(iommu, dmar_domain, dev, pasid, old); if (ret) - goto out_remove_dev_pasid; + goto out_unwind_iopf; domain_remove_dev_pasid(old, dev, pasid); @@ -4201,6 +4109,8 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, return 0; +out_unwind_iopf: + iopf_for_domain_replace(old, domain, dev); out_remove_dev_pasid: domain_remove_dev_pasid(domain, dev, pasid); return ret; @@ -4415,13 +4325,18 @@ static int identity_domain_attach_dev(struct iommu_domain *domain, struct device if (dev_is_real_dma_subdevice(dev)) return 0; - if (sm_supported(iommu)) { + /* + * No PRI support with the global identity domain. No need to enable or + * disable PRI in this path as the iommu has been put in the blocking + * state. + */ + if (sm_supported(iommu)) ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); - if (!ret) - iommu_enable_pci_caps(info); - } else { + else ret = device_setup_pass_through(dev); - } + + if (!ret) + info->domain_attached = true; return ret; } @@ -4437,10 +4352,16 @@ static int identity_domain_set_dev_pasid(struct iommu_domain *domain, if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) return -EOPNOTSUPP; - ret = domain_setup_passthrough(iommu, dev, pasid, old); + ret = iopf_for_domain_replace(domain, old, dev); if (ret) return ret; + ret = domain_setup_passthrough(iommu, dev, pasid, old); + if (ret) { + iopf_for_domain_replace(old, domain, dev); + return ret; + } + domain_remove_dev_pasid(old, dev, pasid); return 0; } @@ -4463,11 +4384,10 @@ const struct iommu_ops intel_iommu_ops = { .domain_alloc_sva = intel_svm_domain_alloc, .domain_alloc_nested = intel_iommu_domain_alloc_nested, .probe_device = intel_iommu_probe_device, + .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, .device_group = intel_iommu_device_group, - .dev_enable_feat = intel_iommu_dev_enable_feat, - .dev_disable_feat = intel_iommu_dev_disable_feat, .is_attach_deferred = intel_iommu_is_attach_deferred, .def_domain_type = device_def_domain_type, .pgsize_bitmap = SZ_4K, @@ -4504,6 +4424,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx); +/* QM57/QS57 integrated gfx malfunctions with dmar */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx); + /* Broadwell igfx malfunctions with dmar */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx); @@ -4581,7 +4504,6 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 6ea7bbe26b19..61f42802fe9e 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -493,14 +493,13 @@ struct q_inval { /* Page Request Queue depth */ #define PRQ_ORDER 4 -#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) -#define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5) +#define PRQ_SIZE (SZ_4K << PRQ_ORDER) +#define PRQ_RING_MASK (PRQ_SIZE - 0x20) +#define PRQ_DEPTH (PRQ_SIZE >> 5) struct dmar_pci_notify_info; #ifdef CONFIG_IRQ_REMAP -/* 1MB - maximum possible interrupt remapping table size */ -#define INTR_REMAP_PAGE_ORDER 8 #define INTR_REMAP_TABLE_REG_SIZE 0xf #define INTR_REMAP_TABLE_REG_SIZE_MASK 0xf @@ -615,6 +614,9 @@ struct dmar_domain { u8 has_mappings:1; /* Has mappings configured through * iommu_map() interface. */ + u8 iotlb_sync_map:1; /* Need to flush IOTLB cache or write + * buffer when creating mappings. + */ spinlock_t lock; /* Protect device tracking lists */ struct list_head devices; /* all devices' list */ @@ -722,7 +724,9 @@ struct intel_iommu { unsigned char name[16]; /* Device Name */ #ifdef CONFIG_INTEL_IOMMU - unsigned long *domain_ids; /* bitmap of domains */ + /* mutex to protect domain_ida */ + struct mutex did_lock; + struct ida domain_ida; /* domain id allocator */ unsigned long *copied_tables; /* bitmap of copied tables */ spinlock_t lock; /* protect context, domain ids */ struct root_entry *root_entry; /* virtual address */ @@ -773,7 +777,9 @@ struct device_domain_info { u8 ats_supported:1; u8 ats_enabled:1; u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */ + u8 domain_attached:1; /* Device has domain attached */ u8 ats_qdep; + unsigned int iopf_refcount; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ @@ -808,11 +814,22 @@ static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) } /* - * Domain ID reserved for pasid entries programmed for first-level - * only and pass-through transfer modes. + * Domain ID 0 and 1 are reserved: + * + * If Caching mode is set, then invalid translations are tagged + * with domain-id 0, hence we need to pre-allocate it. We also + * use domain-id 0 as a marker for non-allocated domain-id, so + * make sure it is not used for a real domain. + * + * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid + * entry for first-level or pass-through translation modes should + * be programmed with a domain id different from those used for + * second-level or nested translation. We reserve a domain id for + * this purpose. This domain id is also used for identity domain + * in legacy mode. */ #define FLPT_DEFAULT_DID 1 -#define NUM_RESERVED_DID 2 +#define IDA_START_DID 2 /* Retrieve the domain ID which has allocated to the domain */ static inline u16 @@ -953,25 +970,6 @@ static inline unsigned long lvl_to_nr_pages(unsigned int lvl) return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); } -/* VT-d pages must always be _smaller_ than MM pages. Otherwise things - are never going to work. */ -static inline unsigned long mm_to_dma_pfn_start(unsigned long mm_pfn) -{ - return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); -} -static inline unsigned long mm_to_dma_pfn_end(unsigned long mm_pfn) -{ - return ((mm_pfn + 1) << (PAGE_SHIFT - VTD_PAGE_SHIFT)) - 1; -} -static inline unsigned long page_to_dma_pfn(struct page *pg) -{ - return mm_to_dma_pfn_start(page_to_pfn(pg)); -} -static inline unsigned long virt_to_dma_pfn(void *p) -{ - return page_to_dma_pfn(virt_to_page(p)); -} - static inline void context_set_present(struct context_entry *context) { context->lo |= 1; @@ -1294,6 +1292,8 @@ struct cache_tag { unsigned int users; }; +int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev, + ioasid_t pasid, enum cache_tag_type type); int cache_tag_assign_domain(struct dmar_domain *domain, struct device *dev, ioasid_t pasid); void cache_tag_unassign_domain(struct dmar_domain *domain, @@ -1304,9 +1304,8 @@ void cache_tag_flush_all(struct dmar_domain *domain); void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, unsigned long end); -void intel_context_flush_present(struct device_domain_info *info, - struct context_entry *context, - u16 did, bool affect_domains); +void intel_context_flush_no_pasid(struct device_domain_info *info, + struct context_entry *context, u16 did); int intel_iommu_enable_prq(struct intel_iommu *iommu); int intel_iommu_finish_prq(struct intel_iommu *iommu); @@ -1314,6 +1313,42 @@ void intel_iommu_page_response(struct device *dev, struct iopf_fault *evt, struct iommu_page_response *msg); void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid); +int intel_iommu_enable_iopf(struct device *dev); +void intel_iommu_disable_iopf(struct device *dev); + +static inline int iopf_for_domain_set(struct iommu_domain *domain, + struct device *dev) +{ + if (!domain || !domain->iopf_handler) + return 0; + + return intel_iommu_enable_iopf(dev); +} + +static inline void iopf_for_domain_remove(struct iommu_domain *domain, + struct device *dev) +{ + if (!domain || !domain->iopf_handler) + return; + + intel_iommu_disable_iopf(dev); +} + +static inline int iopf_for_domain_replace(struct iommu_domain *new, + struct iommu_domain *old, + struct device *dev) +{ + int ret; + + ret = iopf_for_domain_set(new, dev); + if (ret) + return ret; + + iopf_for_domain_remove(old, dev); + + return 0; +} + #ifdef CONFIG_INTEL_IOMMU_SVM void intel_svm_check(struct intel_iommu *iommu); struct iommu_domain *intel_svm_domain_alloc(struct device *dev, diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 3bc2a03cceca..cf7b6882ec75 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -530,11 +530,11 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) if (!ir_table) return -ENOMEM; - ir_table_base = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, - INTR_REMAP_PAGE_ORDER); + /* 1MB - maximum possible interrupt remapping table size */ + ir_table_base = + iommu_alloc_pages_node_sz(iommu->node, GFP_KERNEL, SZ_1M); if (!ir_table_base) { - pr_err("IR%d: failed to allocate pages of order %d\n", - iommu->seq_id, INTR_REMAP_PAGE_ORDER); + pr_err("IR%d: failed to allocate 1M of pages\n", iommu->seq_id); goto out_free_table; } @@ -612,7 +612,7 @@ out_free_fwnode: out_free_bitmap: bitmap_free(bitmap); out_free_pages: - iommu_free_pages(ir_table_base, INTR_REMAP_PAGE_ORDER); + iommu_free_pages(ir_table_base); out_free_table: kfree(ir_table); @@ -633,7 +633,7 @@ static void intel_teardown_irq_remapping(struct intel_iommu *iommu) irq_domain_free_fwnode(fn); iommu->ir_domain = NULL; } - iommu_free_pages(iommu->ir_table->base, INTR_REMAP_PAGE_ORDER); + iommu_free_pages(iommu->ir_table->base); bitmap_free(iommu->ir_table->bitmap); kfree(iommu->ir_table); iommu->ir_table = NULL; diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index aba92c00b427..fc312f649f9e 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -27,8 +27,7 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, unsigned long flags; int ret = 0; - if (info->domain) - device_block_translation(dev); + device_block_translation(dev); if (iommu->agaw < dmar_domain->s2_domain->agaw) { dev_err_ratelimited(dev, "Adjusted guest address width not compatible\n"); @@ -56,17 +55,24 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, if (ret) goto detach_iommu; + ret = iopf_for_domain_set(domain, dev); + if (ret) + goto unassign_tag; + ret = intel_pasid_setup_nested(iommu, dev, IOMMU_NO_PASID, dmar_domain); if (ret) - goto unassign_tag; + goto disable_iopf; info->domain = dmar_domain; + info->domain_attached = true; spin_lock_irqsave(&dmar_domain->lock, flags); list_add(&info->link, &dmar_domain->devices); spin_unlock_irqrestore(&dmar_domain->lock, flags); return 0; +disable_iopf: + iopf_for_domain_remove(domain, dev); unassign_tag: cache_tag_unassign_domain(dmar_domain, dev, IOMMU_NO_PASID); detach_iommu: @@ -166,14 +172,20 @@ static int intel_nested_set_dev_pasid(struct iommu_domain *domain, if (IS_ERR(dev_pasid)) return PTR_ERR(dev_pasid); - ret = domain_setup_nested(iommu, dmar_domain, dev, pasid, old); + ret = iopf_for_domain_replace(domain, old, dev); if (ret) goto out_remove_dev_pasid; + ret = domain_setup_nested(iommu, dmar_domain, dev, pasid, old); + if (ret) + goto out_unwind_iopf; + domain_remove_dev_pasid(old, dev, pasid); return 0; +out_unwind_iopf: + iopf_for_domain_replace(old, domain, dev); out_remove_dev_pasid: domain_remove_dev_pasid(domain, dev, pasid); return ret; @@ -198,7 +210,7 @@ intel_iommu_domain_alloc_nested(struct device *dev, struct iommu_domain *parent, struct dmar_domain *domain; int ret; - if (!nested_supported(iommu) || flags) + if (!nested_supported(iommu) || flags & ~IOMMU_HWPT_ALLOC_PASID) return ERR_PTR(-EOPNOTSUPP); /* Must be nested domain */ diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index fb59a7d35958..ac67a056b6c8 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -60,14 +60,14 @@ int intel_pasid_alloc_table(struct device *dev) size = max_pasid >> (PASID_PDE_SHIFT - 3); order = size ? get_order(size) : 0; - dir = iommu_alloc_pages_node(info->iommu->node, GFP_KERNEL, order); + dir = iommu_alloc_pages_node_sz(info->iommu->node, GFP_KERNEL, + 1 << (order + PAGE_SHIFT)); if (!dir) { kfree(pasid_table); return -ENOMEM; } pasid_table->table = dir; - pasid_table->order = order; pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); info->pasid_table = pasid_table; @@ -97,10 +97,10 @@ void intel_pasid_free_table(struct device *dev) max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; for (i = 0; i < max_pde; i++) { table = get_pasid_table_from_pde(&dir[i]); - iommu_free_page(table); + iommu_free_pages(table); } - iommu_free_pages(pasid_table->table, pasid_table->order); + iommu_free_pages(pasid_table->table); kfree(pasid_table); } @@ -148,7 +148,8 @@ retry: if (!entries) { u64 tmp; - entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC); + entries = iommu_alloc_pages_node_sz(info->iommu->node, + GFP_ATOMIC, SZ_4K); if (!entries) return NULL; @@ -161,7 +162,7 @@ retry: tmp = 0ULL; if (!try_cmpxchg64(&dir[dir_index].val, &tmp, (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { - iommu_free_page(entries); + iommu_free_pages(entries); goto retry; } if (!ecap_coherent(info->iommu->ecap)) { @@ -932,7 +933,7 @@ static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - intel_context_flush_present(info, context, did, false); + intel_context_flush_no_pasid(info, context, did); } static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) @@ -992,6 +993,8 @@ static int context_entry_set_pasid_table(struct context_entry *context, context_set_sm_dte(context); if (info->pasid_supported) context_set_pasid(context); + if (info->pri_supported) + context_set_sm_pre(context); context_set_fault_enable(context); context_set_present(context); @@ -1117,17 +1120,15 @@ static void __context_flush_dev_iotlb(struct device_domain_info *info) /* * Cache invalidations after change in a context table entry that was present - * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If - * IOMMU is in scalable mode and all PASID table entries of the device were - * non-present, set flush_domains to false. Otherwise, true. + * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). + * This helper can only be used when IOMMU is working in the legacy mode or + * IOMMU is in scalable mode but all PASID table entries of the device are + * non-present. */ -void intel_context_flush_present(struct device_domain_info *info, - struct context_entry *context, - u16 did, bool flush_domains) +void intel_context_flush_no_pasid(struct device_domain_info *info, + struct context_entry *context, u16 did) { struct intel_iommu *iommu = info->iommu; - struct pasid_entry *pte; - int i; /* * Device-selective context-cache invalidation. The Domain-ID field @@ -1150,30 +1151,5 @@ void intel_context_flush_present(struct device_domain_info *info, return; } - /* - * For scalable mode: - * - Domain-selective PASID-cache invalidation to affected domains - * - Domain-selective IOTLB invalidation to affected domains - * - Global Device-TLB invalidation to affected functions - */ - if (flush_domains) { - /* - * If the IOMMU is running in scalable mode and there might - * be potential PASID translations, the caller should hold - * the lock to ensure that context changes and cache flushes - * are atomic. - */ - assert_spin_locked(&iommu->lock); - for (i = 0; i < info->pasid_table->max_pasid; i++) { - pte = intel_pasid_get_entry(info->dev, i); - if (!pte || !pasid_pte_is_present(pte)) - continue; - - did = pasid_get_domain_id(pte); - qi_flush_pasid_cache(iommu, did, QI_PC_ALL_PASIDS, 0); - iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); - } - } - __context_flush_dev_iotlb(info); } diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 668d8ece6b14..fd0fd1a0df84 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -47,7 +47,6 @@ struct pasid_entry { /* The representative of a PASID table */ struct pasid_table { void *table; /* pasid table pointer */ - int order; /* page order of pasid table */ u32 max_pasid; /* max pasid */ }; diff --git a/drivers/iommu/intel/prq.c b/drivers/iommu/intel/prq.c index 064194399b38..52570e42a14c 100644 --- a/drivers/iommu/intel/prq.c +++ b/drivers/iommu/intel/prq.c @@ -67,7 +67,7 @@ void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid) u16 sid, did; info = dev_iommu_priv_get(dev); - if (!info->pri_enabled) + if (!info->iopf_refcount) return; iommu = info->iommu; @@ -290,7 +290,8 @@ int intel_iommu_enable_prq(struct intel_iommu *iommu) struct iopf_queue *iopfq; int irq, ret; - iommu->prq = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER); + iommu->prq = + iommu_alloc_pages_node_sz(iommu->node, GFP_KERNEL, PRQ_SIZE); if (!iommu->prq) { pr_warn("IOMMU: %s: Failed to allocate page request queue\n", iommu->name); @@ -340,7 +341,7 @@ free_hwirq: dmar_free_hwirq(irq); iommu->pr_irq = 0; free_prq: - iommu_free_pages(iommu->prq, PRQ_ORDER); + iommu_free_pages(iommu->prq); iommu->prq = NULL; return ret; @@ -363,7 +364,7 @@ int intel_iommu_finish_prq(struct intel_iommu *iommu) iommu->iopf_queue = NULL; } - iommu_free_pages(iommu->prq, PRQ_ORDER); + iommu_free_pages(iommu->prq); iommu->prq = NULL; return 0; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index f5569347591f..f3da596410b5 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -110,6 +110,41 @@ static const struct mmu_notifier_ops intel_mmuops = { .free_notifier = intel_mm_free_notifier, }; +static int intel_iommu_sva_supported(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu; + + if (!info || dmar_disabled) + return -EINVAL; + + iommu = info->iommu; + if (!iommu) + return -EINVAL; + + if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) + return -ENODEV; + + if (!info->pasid_enabled || !info->ats_enabled) + return -EINVAL; + + /* + * Devices having device-specific I/O fault handling should not + * support PCI/PRI. The IOMMU side has no means to check the + * capability of device-specific IOPF. Therefore, IOMMU can only + * default that if the device driver enables SVA on a non-PRI + * device, it will handle IOPF in its own way. + */ + if (!info->pri_supported) + return 0; + + /* Devices supporting PRI should have it enabled. */ + if (!info->pri_enabled) + return -EINVAL; + + return 0; +} + static int intel_svm_set_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid, struct iommu_domain *old) @@ -121,22 +156,31 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, unsigned long sflags; int ret = 0; + ret = intel_iommu_sva_supported(dev); + if (ret) + return ret; + dev_pasid = domain_add_dev_pasid(domain, dev, pasid); if (IS_ERR(dev_pasid)) return PTR_ERR(dev_pasid); + ret = iopf_for_domain_replace(domain, old, dev); + if (ret) + goto out_remove_dev_pasid; + /* Setup the pasid table: */ sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; ret = __domain_setup_first_level(iommu, dev, pasid, FLPT_DEFAULT_DID, mm->pgd, sflags, old); if (ret) - goto out_remove_dev_pasid; + goto out_unwind_iopf; domain_remove_dev_pasid(old, dev, pasid); return 0; - +out_unwind_iopf: + iopf_for_domain_replace(old, domain, dev); out_remove_dev_pasid: domain_remove_dev_pasid(domain, dev, pasid); return ret; @@ -161,6 +205,10 @@ struct iommu_domain *intel_svm_domain_alloc(struct device *dev, struct dmar_domain *domain; int ret; + ret = intel_iommu_sva_supported(dev); + if (ret) + return ERR_PTR(ret); + domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) return ERR_PTR(-ENOMEM); diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 7632c80edea6..96425e92f313 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -13,6 +13,7 @@ #include <linux/bitops.h> #include <linux/io-pgtable.h> #include <linux/kernel.h> +#include <linux/device/faux.h> #include <linux/sizes.h> #include <linux/slab.h> #include <linux/types.h> @@ -251,8 +252,6 @@ static inline bool arm_lpae_concat_mandatory(struct io_pgtable_cfg *cfg, (data->start_level == 1) && (oas == 40); } -static bool selftest_running = false; - static dma_addr_t __arm_lpae_dma_addr(void *pages) { return (dma_addr_t)virt_to_phys(pages); @@ -263,16 +262,20 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, void *cookie) { struct device *dev = cfg->iommu_dev; - int order = get_order(size); + size_t alloc_size; dma_addr_t dma; void *pages; - VM_BUG_ON((gfp & __GFP_HIGHMEM)); - + /* + * For very small starting-level translation tables the HW requires a + * minimum alignment of at least 64 to cover all cases. + */ + alloc_size = max(size, 64); if (cfg->alloc) - pages = cfg->alloc(cookie, size, gfp); + pages = cfg->alloc(cookie, alloc_size, gfp); else - pages = iommu_alloc_pages_node(dev_to_node(dev), gfp, order); + pages = iommu_alloc_pages_node_sz(dev_to_node(dev), gfp, + alloc_size); if (!pages) return NULL; @@ -300,7 +303,7 @@ out_free: if (cfg->free) cfg->free(cookie, pages, size); else - iommu_free_pages(pages, order); + iommu_free_pages(pages); return NULL; } @@ -316,7 +319,7 @@ static void __arm_lpae_free_pages(void *pages, size_t size, if (cfg->free) cfg->free(cookie, pages, size); else - iommu_free_pages(pages, get_order(size)); + iommu_free_pages(pages); } static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries, @@ -371,7 +374,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, for (i = 0; i < num_entries; i++) if (iopte_leaf(ptep[i], lvl, data->iop.fmt)) { /* We require an unmap first */ - WARN_ON(!selftest_running); + WARN_ON(!(data->iop.cfg.quirks & IO_PGTABLE_QUIRK_NO_WARN)); return -EEXIST; } else if (iopte_type(ptep[i]) == ARM_LPAE_PTE_TYPE_TABLE) { /* @@ -473,7 +476,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, cptep = iopte_deref(pte, data); } else if (pte) { /* We require an unmap first */ - WARN_ON(!selftest_running); + WARN_ON(!(cfg->quirks & IO_PGTABLE_QUIRK_NO_WARN)); return -EEXIST; } @@ -641,8 +644,10 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, unmap_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data); ptep += unmap_idx_start; pte = READ_ONCE(*ptep); - if (WARN_ON(!pte)) - return 0; + if (!pte) { + WARN_ON(!(data->iop.cfg.quirks & IO_PGTABLE_QUIRK_NO_WARN)); + return -ENOENT; + } /* If the size matches this level, we're in the right place */ if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { @@ -652,8 +657,10 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, /* Find and handle non-leaf entries */ for (i = 0; i < num_entries; i++) { pte = READ_ONCE(ptep[i]); - if (WARN_ON(!pte)) + if (!pte) { + WARN_ON(!(data->iop.cfg.quirks & IO_PGTABLE_QUIRK_NO_WARN)); break; + } if (!iopte_leaf(pte, lvl, iop->fmt)) { __arm_lpae_clear_pte(&ptep[i], &iop->cfg, 1); @@ -968,7 +975,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_ARM_TTBR1 | IO_PGTABLE_QUIRK_ARM_OUTER_WBWA | - IO_PGTABLE_QUIRK_ARM_HD)) + IO_PGTABLE_QUIRK_ARM_HD | + IO_PGTABLE_QUIRK_NO_WARN)) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -1069,7 +1077,8 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) struct arm_lpae_io_pgtable *data; typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr; - if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_S2FWB)) + if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_S2FWB | + IO_PGTABLE_QUIRK_NO_WARN)) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -1310,7 +1319,6 @@ static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops) #define __FAIL(ops, i) ({ \ WARN(1, "selftest: test failed for fmt idx %d\n", (i)); \ arm_lpae_dump_ops(ops); \ - selftest_running = false; \ -EFAULT; \ }) @@ -1326,8 +1334,6 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) size_t size, mapped; struct io_pgtable_ops *ops; - selftest_running = true; - for (i = 0; i < ARRAY_SIZE(fmts); ++i) { cfg_cookie = cfg; ops = alloc_io_pgtable_ops(fmts[i], cfg, cfg); @@ -1416,7 +1422,6 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) free_io_pgtable_ops(ops); } - selftest_running = false; return 0; } @@ -1433,15 +1438,18 @@ static int __init arm_lpae_do_selftests(void) }; int i, j, k, pass = 0, fail = 0; - struct device dev; + struct faux_device *dev; struct io_pgtable_cfg cfg = { .tlb = &dummy_tlb_ops, .coherent_walk = true, - .iommu_dev = &dev, + .quirks = IO_PGTABLE_QUIRK_NO_WARN, }; - /* __arm_lpae_alloc_pages() merely needs dev_to_node() to work */ - set_dev_node(&dev, NUMA_NO_NODE); + dev = faux_device_create("io-pgtable-test", NULL, 0); + if (!dev) + return -ENOMEM; + + cfg.iommu_dev = &dev->dev; for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { for (j = 0; j < ARRAY_SIZE(address_size); ++j) { @@ -1461,6 +1469,8 @@ static int __init arm_lpae_do_selftests(void) } pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail); + faux_device_destroy(dev); + return fail ? -EFAULT : 0; } subsys_initcall(arm_lpae_do_selftests); diff --git a/drivers/iommu/io-pgtable-dart.c b/drivers/iommu/io-pgtable-dart.c index 06aca9ab52f9..679bda104797 100644 --- a/drivers/iommu/io-pgtable-dart.c +++ b/drivers/iommu/io-pgtable-dart.c @@ -107,14 +107,6 @@ static phys_addr_t iopte_to_paddr(dart_iopte pte, return paddr; } -static void *__dart_alloc_pages(size_t size, gfp_t gfp) -{ - int order = get_order(size); - - VM_BUG_ON((gfp & __GFP_HIGHMEM)); - return iommu_alloc_pages(gfp, order); -} - static int dart_init_pte(struct dart_io_pgtable *data, unsigned long iova, phys_addr_t paddr, dart_iopte prot, int num_entries, @@ -256,13 +248,13 @@ static int dart_map_pages(struct io_pgtable_ops *ops, unsigned long iova, /* no L2 table present */ if (!pte) { - cptep = __dart_alloc_pages(tblsz, gfp); + cptep = iommu_alloc_pages_sz(gfp, tblsz); if (!cptep) return -ENOMEM; pte = dart_install_table(cptep, ptep, 0, data); if (pte) - iommu_free_pages(cptep, get_order(tblsz)); + iommu_free_pages(cptep); /* L2 table is present (now) */ pte = READ_ONCE(*ptep); @@ -413,7 +405,8 @@ apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) cfg->apple_dart_cfg.n_ttbrs = 1 << data->tbl_bits; for (i = 0; i < cfg->apple_dart_cfg.n_ttbrs; ++i) { - data->pgd[i] = __dart_alloc_pages(DART_GRANULE(data), GFP_KERNEL); + data->pgd[i] = + iommu_alloc_pages_sz(GFP_KERNEL, DART_GRANULE(data)); if (!data->pgd[i]) goto out_free_data; cfg->apple_dart_cfg.ttbr[i] = virt_to_phys(data->pgd[i]); @@ -423,8 +416,7 @@ apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) out_free_data: while (--i >= 0) { - iommu_free_pages(data->pgd[i], - get_order(DART_GRANULE(data))); + iommu_free_pages(data->pgd[i]); } kfree(data); return NULL; @@ -433,7 +425,6 @@ out_free_data: static void apple_dart_free_pgtable(struct io_pgtable *iop) { struct dart_io_pgtable *data = io_pgtable_to_data(iop); - int order = get_order(DART_GRANULE(data)); dart_iopte *ptep, *end; int i; @@ -445,9 +436,9 @@ static void apple_dart_free_pgtable(struct io_pgtable *iop) dart_iopte pte = *ptep++; if (pte) - iommu_free_pages(iopte_deref(pte, data), order); + iommu_free_pages(iopte_deref(pte, data)); } - iommu_free_pages(data->pgd[i], order); + iommu_free_pages(data->pgd[i]); } kfree(data); diff --git a/drivers/iommu/iommu-pages.c b/drivers/iommu/iommu-pages.c new file mode 100644 index 000000000000..238c09e5166b --- /dev/null +++ b/drivers/iommu/iommu-pages.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, Google LLC. + * Pasha Tatashin <pasha.tatashin@soleen.com> + */ +#include "iommu-pages.h" +#include <linux/gfp.h> +#include <linux/mm.h> + +#define IOPTDESC_MATCH(pg_elm, elm) \ + static_assert(offsetof(struct page, pg_elm) == \ + offsetof(struct ioptdesc, elm)) +IOPTDESC_MATCH(flags, __page_flags); +IOPTDESC_MATCH(lru, iopt_freelist_elm); /* Ensure bit 0 is clear */ +IOPTDESC_MATCH(mapping, __page_mapping); +IOPTDESC_MATCH(private, _private); +IOPTDESC_MATCH(page_type, __page_type); +IOPTDESC_MATCH(_refcount, __page_refcount); +#ifdef CONFIG_MEMCG +IOPTDESC_MATCH(memcg_data, memcg_data); +#endif +#undef IOPTDESC_MATCH +static_assert(sizeof(struct ioptdesc) <= sizeof(struct page)); + +/** + * iommu_alloc_pages_node_sz - Allocate a zeroed page of a given size from + * specific NUMA node + * @nid: memory NUMA node id + * @gfp: buddy allocator flags + * @size: Memory size to allocate, rounded up to a power of 2 + * + * Returns the virtual address of the allocated page. The page must be freed + * either by calling iommu_free_pages() or via iommu_put_pages_list(). The + * returned allocation is round_up_pow_two(size) big, and is physically aligned + * to its size. + */ +void *iommu_alloc_pages_node_sz(int nid, gfp_t gfp, size_t size) +{ + unsigned long pgcnt; + struct folio *folio; + unsigned int order; + + /* This uses page_address() on the memory. */ + if (WARN_ON(gfp & __GFP_HIGHMEM)) + return NULL; + + /* + * Currently sub page allocations result in a full page being returned. + */ + order = get_order(size); + + /* + * __folio_alloc_node() does not handle NUMA_NO_NODE like + * alloc_pages_node() did. + */ + if (nid == NUMA_NO_NODE) + nid = numa_mem_id(); + + folio = __folio_alloc_node(gfp | __GFP_ZERO, order, nid); + if (unlikely(!folio)) + return NULL; + + /* + * All page allocations that should be reported to as "iommu-pagetables" + * to userspace must use one of the functions below. This includes + * allocations of page-tables and other per-iommu_domain configuration + * structures. + * + * This is necessary for the proper accounting as IOMMU state can be + * rather large, i.e. multiple gigabytes in size. + */ + pgcnt = 1UL << order; + mod_node_page_state(folio_pgdat(folio), NR_IOMMU_PAGES, pgcnt); + lruvec_stat_mod_folio(folio, NR_SECONDARY_PAGETABLE, pgcnt); + + return folio_address(folio); +} +EXPORT_SYMBOL_GPL(iommu_alloc_pages_node_sz); + +static void __iommu_free_desc(struct ioptdesc *iopt) +{ + struct folio *folio = ioptdesc_folio(iopt); + const unsigned long pgcnt = 1UL << folio_order(folio); + + mod_node_page_state(folio_pgdat(folio), NR_IOMMU_PAGES, -pgcnt); + lruvec_stat_mod_folio(folio, NR_SECONDARY_PAGETABLE, -pgcnt); + folio_put(folio); +} + +/** + * iommu_free_pages - free pages + * @virt: virtual address of the page to be freed. + * + * The page must have have been allocated by iommu_alloc_pages_node_sz() + */ +void iommu_free_pages(void *virt) +{ + if (!virt) + return; + __iommu_free_desc(virt_to_ioptdesc(virt)); +} +EXPORT_SYMBOL_GPL(iommu_free_pages); + +/** + * iommu_put_pages_list - free a list of pages. + * @list: The list of pages to be freed + * + * Frees a list of pages allocated by iommu_alloc_pages_node_sz(). On return the + * passed list is invalid, the caller must use IOMMU_PAGES_LIST_INIT to reinit + * the list if it expects to use it again. + */ +void iommu_put_pages_list(struct iommu_pages_list *list) +{ + struct ioptdesc *iopt, *tmp; + + list_for_each_entry_safe(iopt, tmp, &list->pages, iopt_freelist_elm) + __iommu_free_desc(iopt); +} +EXPORT_SYMBOL_GPL(iommu_put_pages_list); diff --git a/drivers/iommu/iommu-pages.h b/drivers/iommu/iommu-pages.h index 82ebf0033081..b3af2813ed0c 100644 --- a/drivers/iommu/iommu-pages.h +++ b/drivers/iommu/iommu-pages.h @@ -7,180 +7,95 @@ #ifndef __IOMMU_PAGES_H #define __IOMMU_PAGES_H -#include <linux/vmstat.h> -#include <linux/gfp.h> -#include <linux/mm.h> - -/* - * All page allocations that should be reported to as "iommu-pagetables" to - * userspace must use one of the functions below. This includes allocations of - * page-tables and other per-iommu_domain configuration structures. - * - * This is necessary for the proper accounting as IOMMU state can be rather - * large, i.e. multiple gigabytes in size. - */ - -/** - * __iommu_alloc_account - account for newly allocated page. - * @page: head struct page of the page. - * @order: order of the page - */ -static inline void __iommu_alloc_account(struct page *page, int order) -{ - const long pgcnt = 1l << order; - - mod_node_page_state(page_pgdat(page), NR_IOMMU_PAGES, pgcnt); - mod_lruvec_page_state(page, NR_SECONDARY_PAGETABLE, pgcnt); -} - -/** - * __iommu_free_account - account a page that is about to be freed. - * @page: head struct page of the page. - * @order: order of the page - */ -static inline void __iommu_free_account(struct page *page, int order) -{ - const long pgcnt = 1l << order; - - mod_node_page_state(page_pgdat(page), NR_IOMMU_PAGES, -pgcnt); - mod_lruvec_page_state(page, NR_SECONDARY_PAGETABLE, -pgcnt); -} +#include <linux/iommu.h> /** - * __iommu_alloc_pages - allocate a zeroed page of a given order. - * @gfp: buddy allocator flags - * @order: page order + * struct ioptdesc - Memory descriptor for IOMMU page tables + * @iopt_freelist_elm: List element for a struct iommu_pages_list * - * returns the head struct page of the allocated page. + * This struct overlays struct page for now. Do not modify without a good + * understanding of the issues. */ -static inline struct page *__iommu_alloc_pages(gfp_t gfp, int order) +struct ioptdesc { + unsigned long __page_flags; + + struct list_head iopt_freelist_elm; + unsigned long __page_mapping; + pgoff_t __index; + void *_private; + + unsigned int __page_type; + atomic_t __page_refcount; +#ifdef CONFIG_MEMCG + unsigned long memcg_data; +#endif +}; + +static inline struct ioptdesc *folio_ioptdesc(struct folio *folio) { - struct page *page; - - page = alloc_pages(gfp | __GFP_ZERO, order); - if (unlikely(!page)) - return NULL; - - __iommu_alloc_account(page, order); - - return page; + return (struct ioptdesc *)folio; } -/** - * __iommu_free_pages - free page of a given order - * @page: head struct page of the page - * @order: page order - */ -static inline void __iommu_free_pages(struct page *page, int order) +static inline struct folio *ioptdesc_folio(struct ioptdesc *iopt) { - if (!page) - return; - - __iommu_free_account(page, order); - __free_pages(page, order); + return (struct folio *)iopt; } -/** - * iommu_alloc_pages_node - allocate a zeroed page of a given order from - * specific NUMA node. - * @nid: memory NUMA node id - * @gfp: buddy allocator flags - * @order: page order - * - * returns the virtual address of the allocated page - */ -static inline void *iommu_alloc_pages_node(int nid, gfp_t gfp, int order) +static inline struct ioptdesc *virt_to_ioptdesc(void *virt) { - struct page *page = alloc_pages_node(nid, gfp | __GFP_ZERO, order); - - if (unlikely(!page)) - return NULL; - - __iommu_alloc_account(page, order); - - return page_address(page); + return folio_ioptdesc(virt_to_folio(virt)); } -/** - * iommu_alloc_pages - allocate a zeroed page of a given order - * @gfp: buddy allocator flags - * @order: page order - * - * returns the virtual address of the allocated page - */ -static inline void *iommu_alloc_pages(gfp_t gfp, int order) -{ - struct page *page = __iommu_alloc_pages(gfp, order); - - if (unlikely(!page)) - return NULL; - - return page_address(page); -} +void *iommu_alloc_pages_node_sz(int nid, gfp_t gfp, size_t size); +void iommu_free_pages(void *virt); +void iommu_put_pages_list(struct iommu_pages_list *list); /** - * iommu_alloc_page_node - allocate a zeroed page at specific NUMA node. - * @nid: memory NUMA node id - * @gfp: buddy allocator flags - * - * returns the virtual address of the allocated page + * iommu_pages_list_add - add the page to a iommu_pages_list + * @list: List to add the page to + * @virt: Address returned from iommu_alloc_pages_node_sz() */ -static inline void *iommu_alloc_page_node(int nid, gfp_t gfp) +static inline void iommu_pages_list_add(struct iommu_pages_list *list, + void *virt) { - return iommu_alloc_pages_node(nid, gfp, 0); + list_add_tail(&virt_to_ioptdesc(virt)->iopt_freelist_elm, &list->pages); } /** - * iommu_alloc_page - allocate a zeroed page - * @gfp: buddy allocator flags + * iommu_pages_list_splice - Put all the pages in list from into list to + * @from: Source list of pages + * @to: Destination list of pages * - * returns the virtual address of the allocated page + * from must be re-initialized after calling this function if it is to be + * used again. */ -static inline void *iommu_alloc_page(gfp_t gfp) +static inline void iommu_pages_list_splice(struct iommu_pages_list *from, + struct iommu_pages_list *to) { - return iommu_alloc_pages(gfp, 0); + list_splice(&from->pages, &to->pages); } /** - * iommu_free_pages - free page of a given order - * @virt: virtual address of the page to be freed. - * @order: page order + * iommu_pages_list_empty - True if the list is empty + * @list: List to check */ -static inline void iommu_free_pages(void *virt, int order) +static inline bool iommu_pages_list_empty(struct iommu_pages_list *list) { - if (!virt) - return; - - __iommu_free_pages(virt_to_page(virt), order); + return list_empty(&list->pages); } /** - * iommu_free_page - free page - * @virt: virtual address of the page to be freed. - */ -static inline void iommu_free_page(void *virt) -{ - iommu_free_pages(virt, 0); -} - -/** - * iommu_put_pages_list - free a list of pages. - * @page: the head of the lru list to be freed. + * iommu_alloc_pages_sz - Allocate a zeroed page of a given size from + * specific NUMA node + * @nid: memory NUMA node id + * @gfp: buddy allocator flags + * @size: Memory size to allocate, this is rounded up to a power of 2 * - * There are no locking requirement for these pages, as they are going to be - * put on a free list as soon as refcount reaches 0. Pages are put on this LRU - * list once they are removed from the IOMMU page tables. However, they can - * still be access through debugfs. + * Returns the virtual address of the allocated page. */ -static inline void iommu_put_pages_list(struct list_head *page) +static inline void *iommu_alloc_pages_sz(gfp_t gfp, size_t size) { - while (!list_empty(page)) { - struct page *p = list_entry(page->prev, struct page, lru); - - list_del(&p->lru); - __iommu_free_account(p, 0); - put_page(p); - } + return iommu_alloc_pages_node_sz(NUMA_NO_NODE, gfp, size); } #endif /* __IOMMU_PAGES_H */ diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h index de5b54eaa8bf..e236b932e766 100644 --- a/drivers/iommu/iommu-priv.h +++ b/drivers/iommu/iommu-priv.h @@ -5,6 +5,7 @@ #define __LINUX_IOMMU_PRIV_H #include <linux/iommu.h> +#include <linux/msi.h> static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) { @@ -17,6 +18,8 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) return dev->iommu->iommu_dev->ops; } +void dev_iommu_free(struct device *dev); + const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); static inline const struct iommu_ops *iommu_fwspec_ops(struct iommu_fwspec *fwspec) @@ -24,8 +27,7 @@ static inline const struct iommu_ops *iommu_fwspec_ops(struct iommu_fwspec *fwsp return iommu_ops_from_fwnode(fwspec ? fwspec->iommu_fwnode : NULL); } -int iommu_group_replace_domain(struct iommu_group *group, - struct iommu_domain *new_domain); +void iommu_fwspec_free(struct device *dev); int iommu_device_register_bus(struct iommu_device *iommu, const struct iommu_ops *ops, @@ -46,4 +48,19 @@ void iommu_detach_group_handle(struct iommu_domain *domain, int iommu_replace_group_handle(struct iommu_group *group, struct iommu_domain *new_domain, struct iommu_attach_handle *handle); + +#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE) && IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) +int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, + phys_addr_t msi_addr); +#else /* !CONFIG_IOMMUFD_DRIVER_CORE || !CONFIG_IRQ_MSI_IOMMU */ +static inline int iommufd_sw_msi(struct iommu_domain *domain, + struct msi_desc *desc, phys_addr_t msi_addr) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_IOMMUFD_DRIVER_CORE && CONFIG_IRQ_MSI_IOMMU */ + +int iommu_replace_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid, + struct iommu_attach_handle *handle); #endif /* __LINUX_IOMMU_PRIV_H */ diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 503c5d23c1ea..1a51cfd82808 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -63,9 +63,6 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de * reference is taken. Caller must call iommu_sva_unbind_device() * to release each reference. * - * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to - * initialize the required SVA features. - * * On error, returns an ERR_PTR value. */ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) @@ -299,17 +296,15 @@ static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, const struct iommu_ops *ops = dev_iommu_ops(dev); struct iommu_domain *domain; - if (ops->domain_alloc_sva) { - domain = ops->domain_alloc_sva(dev, mm); - if (IS_ERR(domain)) - return domain; - } else { - domain = ops->domain_alloc(IOMMU_DOMAIN_SVA); - if (!domain) - return ERR_PTR(-ENOMEM); - } + if (!ops->domain_alloc_sva) + return ERR_PTR(-EOPNOTSUPP); + + domain = ops->domain_alloc_sva(dev, mm); + if (IS_ERR(domain)) + return domain; domain->type = IOMMU_DOMAIN_SVA; + domain->cookie_type = IOMMU_COOKIE_SVA; mmgrab(mm); domain->mm = mm; domain->owner = ops; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index e3df1f06afbe..a4b606c591da 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -18,6 +18,7 @@ #include <linux/errno.h> #include <linux/host1x_context_bus.h> #include <linux/iommu.h> +#include <linux/iommufd.h> #include <linux/idr.h> #include <linux/err.h> #include <linux/pci.h> @@ -45,6 +46,9 @@ static unsigned int iommu_def_domain_type __read_mostly; static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); static u32 iommu_cmd_line __read_mostly; +/* Tags used with xa_tag_pointer() in group->pasid_array */ +enum { IOMMU_PASID_ARRAY_DOMAIN = 0, IOMMU_PASID_ARRAY_HANDLE = 1 }; + struct iommu_group { struct kobject kobj; struct kobject *devices_kobj; @@ -273,6 +277,8 @@ int iommu_device_register(struct iommu_device *iommu, err = bus_iommu_probe(iommu_buses[i]); if (err) iommu_device_unregister(iommu); + else + WRITE_ONCE(iommu->ready, true); return err; } EXPORT_SYMBOL_GPL(iommu_device_register); @@ -352,7 +358,7 @@ static struct dev_iommu *dev_iommu_get(struct device *dev) return param; } -static void dev_iommu_free(struct device *dev) +void dev_iommu_free(struct device *dev) { struct dev_iommu *param = dev->iommu; @@ -404,14 +410,42 @@ EXPORT_SYMBOL_GPL(dev_iommu_priv_set); * Init the dev->iommu and dev->iommu_group in the struct device and get the * driver probed */ -static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) +static int iommu_init_device(struct device *dev) { + const struct iommu_ops *ops; struct iommu_device *iommu_dev; struct iommu_group *group; int ret; if (!dev_iommu_get(dev)) return -ENOMEM; + /* + * For FDT-based systems and ACPI IORT/VIOT, the common firmware parsing + * is buried in the bus dma_configure path. Properly unpicking that is + * still a big job, so for now just invoke the whole thing. The device + * already having a driver bound means dma_configure has already run and + * found no IOMMU to wait for, so there's no point calling it again. + */ + if (!dev->iommu->fwspec && !dev->driver && dev->bus->dma_configure) { + mutex_unlock(&iommu_probe_device_lock); + dev->bus->dma_configure(dev); + mutex_lock(&iommu_probe_device_lock); + /* If another instance finished the job for us, skip it */ + if (!dev->iommu || dev->iommu_group) + return -ENODEV; + } + /* + * At this point, relevant devices either now have a fwspec which will + * match ops registered with a non-NULL fwnode, or we can reasonably + * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can + * be present, and that any of their registered instances has suitable + * ops for probing, and thus cheekily co-opt the same mechanism. + */ + ops = iommu_fwspec_ops(dev->iommu->fwspec); + if (!ops) { + ret = -ENODEV; + goto err_free; + } if (!try_module_get(ops->owner)) { ret = -EINVAL; @@ -508,29 +542,27 @@ static void iommu_deinit_device(struct device *dev) dev->iommu_group = NULL; module_put(ops->owner); dev_iommu_free(dev); +#ifdef CONFIG_IOMMU_DMA + dev->dma_iommu = false; +#endif +} + +static struct iommu_domain *pasid_array_entry_to_domain(void *entry) +{ + if (xa_pointer_tag(entry) == IOMMU_PASID_ARRAY_DOMAIN) + return xa_untag_pointer(entry); + return ((struct iommu_attach_handle *)xa_untag_pointer(entry))->domain; } DEFINE_MUTEX(iommu_probe_device_lock); static int __iommu_probe_device(struct device *dev, struct list_head *group_list) { - const struct iommu_ops *ops; struct iommu_group *group; struct group_device *gdev; int ret; /* - * For FDT-based systems and ACPI IORT/VIOT, drivers register IOMMU - * instances with non-NULL fwnodes, and client devices should have been - * identified with a fwspec by this point. Otherwise, we can currently - * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can - * be present, and that any of their registered instances has suitable - * ops for probing, and thus cheekily co-opt the same mechanism. - */ - ops = iommu_fwspec_ops(dev_iommu_fwspec_get(dev)); - if (!ops) - return -ENODEV; - /* * Serialise to avoid races between IOMMU drivers registering in * parallel and/or the "replay" calls from ACPI/OF code via client * driver probe. Once the latter have been cleaned up we should @@ -543,9 +575,15 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list if (dev->iommu_group) return 0; - ret = iommu_init_device(dev, ops); + ret = iommu_init_device(dev); if (ret) return ret; + /* + * And if we do now see any replay calls, they would indicate someone + * misusing the dma_configure path outside bus code. + */ + if (dev->driver) + dev_WARN(dev, "late IOMMU probe at driver bind, something fishy here!\n"); group = dev->iommu_group; gdev = iommu_group_alloc_device(group, dev); @@ -1595,15 +1633,13 @@ static struct iommu_domain *__iommu_alloc_identity_domain(struct device *dev) if (ops->identity_domain) return ops->identity_domain; - /* Older drivers create the identity domain via ops->domain_alloc() */ - if (!ops->domain_alloc) + if (ops->domain_alloc_identity) { + domain = ops->domain_alloc_identity(dev); + if (IS_ERR(domain)) + return domain; + } else { return ERR_PTR(-EOPNOTSUPP); - - domain = ops->domain_alloc(IOMMU_DOMAIN_IDENTITY); - if (IS_ERR(domain)) - return domain; - if (!domain) - return ERR_PTR(-ENOMEM); + } iommu_domain_init(domain, IOMMU_DOMAIN_IDENTITY, ops); return domain; @@ -1950,8 +1986,10 @@ void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token) { - BUG_ON(!domain); + if (WARN_ON(!domain || domain->cookie_type != IOMMU_COOKIE_NONE)) + return; + domain->cookie_type = IOMMU_COOKIE_FAULT_HANDLER; domain->handler = handler; domain->handler_token = token; } @@ -1989,8 +2027,10 @@ __iommu_paging_domain_alloc_flags(struct device *dev, unsigned int type, domain = ops->domain_alloc_paging(dev); else if (ops->domain_alloc_paging_flags) domain = ops->domain_alloc_paging_flags(dev, flags, NULL); +#if IS_ENABLED(CONFIG_FSL_PAMU) else if (ops->domain_alloc && !flags) domain = ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED); +#endif else return ERR_PTR(-EOPNOTSUPP); @@ -2021,9 +2061,19 @@ EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc_flags); void iommu_domain_free(struct iommu_domain *domain) { - if (domain->type == IOMMU_DOMAIN_SVA) + switch (domain->cookie_type) { + case IOMMU_COOKIE_DMA_IOVA: + iommu_put_dma_cookie(domain); + break; + case IOMMU_COOKIE_DMA_MSI: + iommu_put_msi_cookie(domain); + break; + case IOMMU_COOKIE_SVA: mmdrop(domain->mm); - iommu_put_dma_cookie(domain); + break; + default: + break; + } if (domain->ops->free) domain->ops->free(domain); } @@ -2147,6 +2197,30 @@ struct iommu_domain *iommu_get_dma_domain(struct device *dev) return dev->iommu_group->default_domain; } +static void *iommu_make_pasid_array_entry(struct iommu_domain *domain, + struct iommu_attach_handle *handle) +{ + if (handle) { + handle->domain = domain; + return xa_tag_pointer(handle, IOMMU_PASID_ARRAY_HANDLE); + } + + return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN); +} + +static bool domain_iommu_ops_compatible(const struct iommu_ops *ops, + struct iommu_domain *domain) +{ + if (domain->owner == ops) + return true; + + /* For static domains, owner isn't set. */ + if (domain == ops->blocked_domain || domain == ops->identity_domain) + return true; + + return false; +} + static int __iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) { @@ -2157,7 +2231,8 @@ static int __iommu_attach_group(struct iommu_domain *domain, return -EBUSY; dev = iommu_group_first_dev(group); - if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner) + if (!dev_has_iommu(dev) || + !domain_iommu_ops_compatible(dev_iommu_ops(dev), domain)) return -EINVAL; return __iommu_group_set_domain(group, domain); @@ -2187,32 +2262,6 @@ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_attach_group); -/** - * iommu_group_replace_domain - replace the domain that a group is attached to - * @group: IOMMU group that will be attached to the new domain - * @new_domain: new IOMMU domain to replace with - * - * This API allows the group to switch domains without being forced to go to - * the blocking domain in-between. - * - * If the currently attached domain is a core domain (e.g. a default_domain), - * it will act just like the iommu_attach_group(). - */ -int iommu_group_replace_domain(struct iommu_group *group, - struct iommu_domain *new_domain) -{ - int ret; - - if (!new_domain) - return -EINVAL; - - mutex_lock(&group->mutex); - ret = __iommu_group_set_domain(group, new_domain); - mutex_unlock(&group->mutex); - return ret; -} -EXPORT_SYMBOL_NS_GPL(iommu_group_replace_domain, "IOMMUFD_INTERNAL"); - static int __iommu_device_set_domain(struct iommu_group *group, struct device *dev, struct iommu_domain *new_domain, @@ -2364,6 +2413,7 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, unsigned int pgsize_idx, pgsize_idx_next; unsigned long pgsizes; size_t offset, pgsize, pgsize_next; + size_t offset_end; unsigned long addr_merge = paddr | iova; /* Page sizes supported by the hardware and small enough for @size */ @@ -2404,7 +2454,8 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, * If size is big enough to accommodate the larger page, reduce * the number of smaller pages. */ - if (offset + pgsize_next <= size) + if (!check_add_overflow(offset, pgsize_next, &offset_end) && + offset_end <= size) size = offset; out_set_count: @@ -2412,8 +2463,8 @@ out_set_count: return pgsize; } -static int __iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { const struct iommu_domain_ops *ops = domain->ops; unsigned long orig_iova = iova; @@ -2422,12 +2473,19 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t orig_paddr = paddr; int ret = 0; + might_sleep_if(gfpflags_allow_blocking(gfp)); + if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) return -EINVAL; if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL)) return -ENODEV; + /* Discourage passing strange GFP flags */ + if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | + __GFP_HIGHMEM))) + return -EINVAL; + /* find out the minimum page size supported */ min_pagesz = 1 << __ffs(domain->pgsize_bitmap); @@ -2475,31 +2533,27 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, return ret; } -int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size) { const struct iommu_domain_ops *ops = domain->ops; - int ret; - might_sleep_if(gfpflags_allow_blocking(gfp)); - - /* Discourage passing strange GFP flags */ - if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | - __GFP_HIGHMEM))) - return -EINVAL; + if (!ops->iotlb_sync_map) + return 0; + return ops->iotlb_sync_map(domain, iova, size); +} - ret = __iommu_map(domain, iova, paddr, size, prot, gfp); - if (ret == 0 && ops->iotlb_sync_map) { - ret = ops->iotlb_sync_map(domain, iova, size); - if (ret) - goto out_err; - } +int iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +{ + int ret; - return ret; + ret = iommu_map_nosync(domain, iova, paddr, size, prot, gfp); + if (ret) + return ret; -out_err: - /* undo mappings already done */ - iommu_unmap(domain, iova, size); + ret = iommu_sync_map(domain, iova, size); + if (ret) + iommu_unmap(domain, iova, size); return ret; } @@ -2587,6 +2641,25 @@ size_t iommu_unmap(struct iommu_domain *domain, } EXPORT_SYMBOL_GPL(iommu_unmap); +/** + * iommu_unmap_fast() - Remove mappings from a range of IOVA without IOTLB sync + * @domain: Domain to manipulate + * @iova: IO virtual address to start + * @size: Length of the range starting from @iova + * @iotlb_gather: range information for a pending IOTLB flush + * + * iommu_unmap_fast() will remove a translation created by iommu_map(). + * It can't subdivide a mapping created by iommu_map(), so it should be + * called with IOVA ranges that match what was passed to iommu_map(). The + * range can aggregate contiguous iommu_map() calls so long as no individual + * range is split. + * + * Basically iommu_unmap_fast() is the same as iommu_unmap() but for callers + * which manage the IOTLB flushing externally to perform a batched sync. + * + * Returns: Number of bytes of IOVA unmapped. iova + res will be the point + * unmapping stopped. + */ size_t iommu_unmap_fast(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather) @@ -2599,26 +2672,17 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot, gfp_t gfp) { - const struct iommu_domain_ops *ops = domain->ops; size_t len = 0, mapped = 0; phys_addr_t start; unsigned int i = 0; int ret; - might_sleep_if(gfpflags_allow_blocking(gfp)); - - /* Discourage passing strange GFP flags */ - if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | - __GFP_HIGHMEM))) - return -EINVAL; - while (i <= nents) { phys_addr_t s_phys = sg_phys(sg); if (len && s_phys != start + len) { - ret = __iommu_map(domain, iova + mapped, start, + ret = iommu_map_nosync(domain, iova + mapped, start, len, prot, gfp); - if (ret) goto out_err; @@ -2641,11 +2705,10 @@ next: sg = sg_next(sg); } - if (ops->iotlb_sync_map) { - ret = ops->iotlb_sync_map(domain, iova, mapped); - if (ret) - goto out_err; - } + ret = iommu_sync_map(domain, iova, mapped); + if (ret) + goto out_err; + return mapped; out_err: @@ -2689,7 +2752,8 @@ int report_iommu_fault(struct iommu_domain *domain, struct device *dev, * if upper layers showed interest and installed a fault handler, * invoke it. */ - if (domain->handler) + if (domain->cookie_type == IOMMU_COOKIE_FAULT_HANDLER && + domain->handler) ret = domain->handler(domain, dev, iova, flags, domain->handler_token); @@ -2798,31 +2862,39 @@ bool iommu_default_passthrough(void) } EXPORT_SYMBOL_GPL(iommu_default_passthrough); -const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) +static const struct iommu_device *iommu_from_fwnode(const struct fwnode_handle *fwnode) { - const struct iommu_ops *ops = NULL; - struct iommu_device *iommu; + const struct iommu_device *iommu, *ret = NULL; spin_lock(&iommu_device_lock); list_for_each_entry(iommu, &iommu_device_list, list) if (iommu->fwnode == fwnode) { - ops = iommu->ops; + ret = iommu; break; } spin_unlock(&iommu_device_lock); - return ops; + return ret; +} + +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) +{ + const struct iommu_device *iommu = iommu_from_fwnode(fwnode); + + return iommu ? iommu->ops : NULL; } int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) { - const struct iommu_ops *ops = iommu_ops_from_fwnode(iommu_fwnode); + const struct iommu_device *iommu = iommu_from_fwnode(iommu_fwnode); struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - if (!ops) + if (!iommu) return driver_deferred_probe_check_state(dev); + if (!dev->iommu && !READ_ONCE(iommu->ready)) + return -EPROBE_DEFER; if (fwspec) - return ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; + return iommu->ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; if (!dev_iommu_get(dev)) return -ENOMEM; @@ -2849,7 +2921,6 @@ void iommu_fwspec_free(struct device *dev) dev_iommu_fwspec_set(dev, NULL); } } -EXPORT_SYMBOL_GPL(iommu_fwspec_free); int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids) { @@ -2877,38 +2948,6 @@ int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids) } EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids); -/* - * Per device IOMMU features. - */ -int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) -{ - if (dev_has_iommu(dev)) { - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (ops->dev_enable_feat) - return ops->dev_enable_feat(dev, feat); - } - - return -ENODEV; -} -EXPORT_SYMBOL_GPL(iommu_dev_enable_feature); - -/* - * The device drivers should do the necessary cleanups before calling this. - */ -int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) -{ - if (dev_has_iommu(dev)) { - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (ops->dev_disable_feat) - return ops->dev_disable_feat(dev, feat); - } - - return -EBUSY; -} -EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); - /** * iommu_setup_default_domain - Set the default_domain for the group * @group: Group to change @@ -3328,16 +3367,19 @@ static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, } static int __iommu_set_group_pasid(struct iommu_domain *domain, - struct iommu_group *group, ioasid_t pasid) + struct iommu_group *group, ioasid_t pasid, + struct iommu_domain *old) { struct group_device *device, *last_gdev; int ret; for_each_group_device(group, device) { - ret = domain->ops->set_dev_pasid(domain, device->dev, - pasid, NULL); - if (ret) - goto err_revert; + if (device->dev->iommu->max_pasids > 0) { + ret = domain->ops->set_dev_pasid(domain, device->dev, + pasid, old); + if (ret) + goto err_revert; + } } return 0; @@ -3347,7 +3389,18 @@ err_revert: for_each_group_device(group, device) { if (device == last_gdev) break; - iommu_remove_dev_pasid(device->dev, pasid, domain); + if (device->dev->iommu->max_pasids > 0) { + /* + * If no old domain, undo the succeeded devices/pasid. + * Otherwise, rollback the succeeded devices/pasid to + * the old domain. And it is a driver bug to fail + * attaching with a previously good domain. + */ + if (!old || + WARN_ON(old->ops->set_dev_pasid(old, device->dev, + pasid, domain))) + iommu_remove_dev_pasid(device->dev, pasid, domain); + } } return ret; } @@ -3358,8 +3411,10 @@ static void __iommu_remove_group_pasid(struct iommu_group *group, { struct group_device *device; - for_each_group_device(group, device) - iommu_remove_dev_pasid(device->dev, pasid, domain); + for_each_group_device(group, device) { + if (device->dev->iommu->max_pasids > 0) + iommu_remove_dev_pasid(device->dev, pasid, domain); + } } /* @@ -3369,6 +3424,9 @@ static void __iommu_remove_group_pasid(struct iommu_group *group, * @pasid: the pasid of the device. * @handle: the attach handle. * + * Caller should always provide a new handle to avoid race with the paths + * that have lockless reference to handle if it intends to pass a valid handle. + * * Return: 0 on success, or an error. */ int iommu_attach_device_pasid(struct iommu_domain *domain, @@ -3379,6 +3437,7 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, struct iommu_group *group = dev->iommu_group; struct group_device *device; const struct iommu_ops *ops; + void *entry; int ret; if (!group) @@ -3391,33 +3450,146 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, !ops->blocked_domain->ops->set_dev_pasid) return -EOPNOTSUPP; - if (ops != domain->owner || pasid == IOMMU_NO_PASID) + if (!domain_iommu_ops_compatible(ops, domain) || + pasid == IOMMU_NO_PASID) return -EINVAL; mutex_lock(&group->mutex); for_each_group_device(group, device) { - if (pasid >= device->dev->iommu->max_pasids) { + /* + * Skip PASID validation for devices without PASID support + * (max_pasids = 0). These devices cannot issue transactions + * with PASID, so they don't affect group's PASID usage. + */ + if ((device->dev->iommu->max_pasids > 0) && + (pasid >= device->dev->iommu->max_pasids)) { ret = -EINVAL; goto out_unlock; } } - if (handle) - handle->domain = domain; + entry = iommu_make_pasid_array_entry(domain, handle); - ret = xa_insert(&group->pasid_array, pasid, handle, GFP_KERNEL); + /* + * Entry present is a failure case. Use xa_insert() instead of + * xa_reserve(). + */ + ret = xa_insert(&group->pasid_array, pasid, XA_ZERO_ENTRY, GFP_KERNEL); if (ret) goto out_unlock; - ret = __iommu_set_group_pasid(domain, group, pasid); - if (ret) - xa_erase(&group->pasid_array, pasid); + ret = __iommu_set_group_pasid(domain, group, pasid, NULL); + if (ret) { + xa_release(&group->pasid_array, pasid); + goto out_unlock; + } + + /* + * The xa_insert() above reserved the memory, and the group->mutex is + * held, this cannot fail. The new domain cannot be visible until the + * operation succeeds as we cannot tolerate PRIs becoming concurrently + * queued and then failing attach. + */ + WARN_ON(xa_is_err(xa_store(&group->pasid_array, + pasid, entry, GFP_KERNEL))); + out_unlock: mutex_unlock(&group->mutex); return ret; } EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); +/** + * iommu_replace_device_pasid - Replace the domain that a specific pasid + * of the device is attached to + * @domain: the new iommu domain + * @dev: the attached device. + * @pasid: the pasid of the device. + * @handle: the attach handle. + * + * This API allows the pasid to switch domains. The @pasid should have been + * attached. Otherwise, this fails. The pasid will keep the old configuration + * if replacement failed. + * + * Caller should always provide a new handle to avoid race with the paths + * that have lockless reference to handle if it intends to pass a valid handle. + * + * Return 0 on success, or an error. + */ +int iommu_replace_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid, + struct iommu_attach_handle *handle) +{ + /* Caller must be a probed driver on dev */ + struct iommu_group *group = dev->iommu_group; + struct iommu_attach_handle *entry; + struct iommu_domain *curr_domain; + void *curr; + int ret; + + if (!group) + return -ENODEV; + + if (!domain->ops->set_dev_pasid) + return -EOPNOTSUPP; + + if (!domain_iommu_ops_compatible(dev_iommu_ops(dev), domain) || + pasid == IOMMU_NO_PASID || !handle) + return -EINVAL; + + mutex_lock(&group->mutex); + entry = iommu_make_pasid_array_entry(domain, handle); + curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, + XA_ZERO_ENTRY, GFP_KERNEL); + if (xa_is_err(curr)) { + ret = xa_err(curr); + goto out_unlock; + } + + /* + * No domain (with or without handle) attached, hence not + * a replace case. + */ + if (!curr) { + xa_release(&group->pasid_array, pasid); + ret = -EINVAL; + goto out_unlock; + } + + /* + * Reusing handle is problematic as there are paths that refers + * the handle without lock. To avoid race, reject the callers that + * attempt it. + */ + if (curr == entry) { + WARN_ON(1); + ret = -EINVAL; + goto out_unlock; + } + + curr_domain = pasid_array_entry_to_domain(curr); + ret = 0; + + if (curr_domain != domain) { + ret = __iommu_set_group_pasid(domain, group, + pasid, curr_domain); + if (ret) + goto out_unlock; + } + + /* + * The above xa_cmpxchg() reserved the memory, and the + * group->mutex is held, this cannot fail. + */ + WARN_ON(xa_is_err(xa_store(&group->pasid_array, + pasid, entry, GFP_KERNEL))); + +out_unlock: + mutex_unlock(&group->mutex); + return ret; +} +EXPORT_SYMBOL_NS_GPL(iommu_replace_device_pasid, "IOMMUFD_INTERNAL"); + /* * iommu_detach_device_pasid() - Detach the domain from pasid of device * @domain: the iommu domain. @@ -3485,13 +3657,17 @@ struct iommu_attach_handle * iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type) { struct iommu_attach_handle *handle; + void *entry; xa_lock(&group->pasid_array); - handle = xa_load(&group->pasid_array, pasid); - if (!handle) + entry = xa_load(&group->pasid_array, pasid); + if (!entry || xa_pointer_tag(entry) != IOMMU_PASID_ARRAY_HANDLE) { handle = ERR_PTR(-ENOENT); - else if (type && handle->domain->type != type) - handle = ERR_PTR(-EBUSY); + } else { + handle = xa_untag_pointer(entry); + if (type && handle->domain->type != type) + handle = ERR_PTR(-EBUSY); + } xa_unlock(&group->pasid_array); return handle; @@ -3509,30 +3685,43 @@ EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, "IOMMUFD_INTERNAL"); * This is a variant of iommu_attach_group(). It allows the caller to provide * an attach handle and use it when the domain is attached. This is currently * used by IOMMUFD to deliver the I/O page faults. + * + * Caller should always provide a new handle to avoid race with the paths + * that have lockless reference to handle. */ int iommu_attach_group_handle(struct iommu_domain *domain, struct iommu_group *group, struct iommu_attach_handle *handle) { + void *entry; int ret; - if (handle) - handle->domain = domain; + if (!handle) + return -EINVAL; mutex_lock(&group->mutex); - ret = xa_insert(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL); + entry = iommu_make_pasid_array_entry(domain, handle); + ret = xa_insert(&group->pasid_array, + IOMMU_NO_PASID, XA_ZERO_ENTRY, GFP_KERNEL); if (ret) - goto err_unlock; + goto out_unlock; ret = __iommu_attach_group(domain, group); - if (ret) - goto err_erase; - mutex_unlock(&group->mutex); + if (ret) { + xa_release(&group->pasid_array, IOMMU_NO_PASID); + goto out_unlock; + } - return 0; -err_erase: - xa_erase(&group->pasid_array, IOMMU_NO_PASID); -err_unlock: + /* + * The xa_insert() above reserved the memory, and the group->mutex is + * held, this cannot fail. The new domain cannot be visible until the + * operation succeeds as we cannot tolerate PRIs becoming concurrently + * queued and then failing attach. + */ + WARN_ON(xa_is_err(xa_store(&group->pasid_array, + IOMMU_NO_PASID, entry, GFP_KERNEL))); + +out_unlock: mutex_unlock(&group->mutex); return ret; } @@ -3562,33 +3751,37 @@ EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, "IOMMUFD_INTERNAL"); * @new_domain: new IOMMU domain to replace with * @handle: attach handle * - * This is a variant of iommu_group_replace_domain(). It allows the caller to - * provide an attach handle for the new domain and use it when the domain is - * attached. + * This API allows the group to switch domains without being forced to go to + * the blocking domain in-between. It allows the caller to provide an attach + * handle for the new domain and use it when the domain is attached. + * + * If the currently attached domain is a core domain (e.g. a default_domain), + * it will act just like the iommu_attach_group_handle(). + * + * Caller should always provide a new handle to avoid race with the paths + * that have lockless reference to handle. */ int iommu_replace_group_handle(struct iommu_group *group, struct iommu_domain *new_domain, struct iommu_attach_handle *handle) { - void *curr; + void *curr, *entry; int ret; - if (!new_domain) + if (!new_domain || !handle) return -EINVAL; mutex_lock(&group->mutex); - if (handle) { - ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL); - if (ret) - goto err_unlock; - handle->domain = new_domain; - } + entry = iommu_make_pasid_array_entry(new_domain, handle); + ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL); + if (ret) + goto err_unlock; ret = __iommu_group_set_domain(group, new_domain); if (ret) goto err_release; - curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL); + curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, entry, GFP_KERNEL); WARN_ON(xa_is_err(curr)); mutex_unlock(&group->mutex); @@ -3601,3 +3794,45 @@ err_unlock: return ret; } EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL"); + +#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) +/** + * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain + * @desc: MSI descriptor, will store the MSI page + * @msi_addr: MSI target address to be mapped + * + * The implementation of sw_msi() should take msi_addr and map it to + * an IOVA in the domain and call msi_desc_set_iommu_msi_iova() with the + * mapping information. + * + * Return: 0 on success or negative error code if the mapping failed. + */ +int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) +{ + struct device *dev = msi_desc_to_dev(desc); + struct iommu_group *group = dev->iommu_group; + int ret = 0; + + if (!group) + return 0; + + mutex_lock(&group->mutex); + /* An IDENTITY domain must pass through */ + if (group->domain && group->domain->type != IOMMU_DOMAIN_IDENTITY) { + switch (group->domain->cookie_type) { + case IOMMU_COOKIE_DMA_MSI: + case IOMMU_COOKIE_DMA_IOVA: + ret = iommu_dma_sw_msi(group->domain, desc, msi_addr); + break; + case IOMMU_COOKIE_IOMMUFD: + ret = iommufd_sw_msi(group->domain, desc, msi_addr); + break; + default: + ret = -EOPNOTSUPP; + break; + } + } + mutex_unlock(&group->mutex); + return ret; +} +#endif /* CONFIG_IRQ_MSI_IOMMU */ diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig index 0a07f9449fd9..2beeb4f60ee5 100644 --- a/drivers/iommu/iommufd/Kconfig +++ b/drivers/iommu/iommufd/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config IOMMUFD_DRIVER_CORE - tristate + bool default (IOMMUFD_DRIVER || IOMMUFD) if IOMMUFD!=n config IOMMUFD diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index cb784da6cddc..71d692c9a8f4 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only iommufd-y := \ device.o \ - fault.o \ + eventq.o \ hw_pagetable.o \ io_pagetable.o \ ioas.o \ diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 3c7800d4ab62..86244403b532 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -3,6 +3,7 @@ */ #include <linux/iommu.h> #include <linux/iommufd.h> +#include <linux/pci-ats.h> #include <linux/slab.h> #include <uapi/linux/iommufd.h> @@ -17,12 +18,17 @@ MODULE_PARM_DESC( "Allow IOMMUFD to bind to devices even if the platform cannot isolate " "the MSI interrupt window. Enabling this is a security weakness."); +struct iommufd_attach { + struct iommufd_hw_pagetable *hwpt; + struct xarray device_array; +}; + static void iommufd_group_release(struct kref *kref) { struct iommufd_group *igroup = container_of(kref, struct iommufd_group, ref); - WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list)); + WARN_ON(!xa_empty(&igroup->pasid_attach)); xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup, NULL, GFP_KERNEL); @@ -89,7 +95,7 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, kref_init(&new_igroup->ref); mutex_init(&new_igroup->lock); - INIT_LIST_HEAD(&new_igroup->device_list); + xa_init(&new_igroup->pasid_attach); new_igroup->sw_msi_start = PHYS_ADDR_MAX; /* group reference moves into new_igroup */ new_igroup->group = group; @@ -215,7 +221,6 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, refcount_inc(&idev->obj.users); /* igroup refcount moves into iommufd_device */ idev->igroup = igroup; - mutex_init(&idev->iopf_lock); /* * If the caller fails after this success it must call @@ -293,56 +298,83 @@ u32 iommufd_device_to_id(struct iommufd_device *idev) } EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD"); +static unsigned int iommufd_group_device_num(struct iommufd_group *igroup, + ioasid_t pasid) +{ + struct iommufd_attach *attach; + struct iommufd_device *idev; + unsigned int count = 0; + unsigned long index; + + lockdep_assert_held(&igroup->lock); + + attach = xa_load(&igroup->pasid_attach, pasid); + if (attach) + xa_for_each(&attach->device_array, index, idev) + count++; + return count; +} + +#ifdef CONFIG_IRQ_MSI_IOMMU static int iommufd_group_setup_msi(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { - phys_addr_t sw_msi_start = igroup->sw_msi_start; - int rc; + struct iommufd_ctx *ictx = igroup->ictx; + struct iommufd_sw_msi_map *cur; + + if (igroup->sw_msi_start == PHYS_ADDR_MAX) + return 0; /* - * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to - * call iommu_get_msi_cookie() on its behalf. This is necessary to setup - * the MSI window so iommu_dma_prepare_msi() can install pages into our - * domain after request_irq(). If it is not done interrupts will not - * work on this domain. - * - * FIXME: This is conceptually broken for iommufd since we want to allow - * userspace to change the domains, eg switch from an identity IOAS to a - * DMA IOAS. There is currently no way to create a MSI window that - * matches what the IRQ layer actually expects in a newly created - * domain. + * Install all the MSI pages the device has been using into the domain */ - if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) { - rc = iommu_get_msi_cookie(hwpt_paging->common.domain, - sw_msi_start); + guard(mutex)(&ictx->sw_msi_lock); + list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) { + int rc; + + if (cur->sw_msi_start != igroup->sw_msi_start || + !test_bit(cur->id, igroup->required_sw_msi.bitmap)) + continue; + + rc = iommufd_sw_msi_install(ictx, hwpt_paging, cur); if (rc) return rc; - - /* - * iommu_get_msi_cookie() can only be called once per domain, - * it returns -EBUSY on later calls. - */ - hwpt_paging->msi_cookie = true; } return 0; } +#else +static inline int +iommufd_group_setup_msi(struct iommufd_group *igroup, + struct iommufd_hwpt_paging *hwpt_paging) +{ + return 0; +} +#endif + +static bool +iommufd_group_first_attach(struct iommufd_group *igroup, ioasid_t pasid) +{ + lockdep_assert_held(&igroup->lock); + return !xa_load(&igroup->pasid_attach, pasid); +} static int iommufd_device_attach_reserved_iova(struct iommufd_device *idev, struct iommufd_hwpt_paging *hwpt_paging) { + struct iommufd_group *igroup = idev->igroup; int rc; - lockdep_assert_held(&idev->igroup->lock); + lockdep_assert_held(&igroup->lock); rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt, idev->dev, - &idev->igroup->sw_msi_start); + &igroup->sw_msi_start); if (rc) return rc; - if (list_empty(&idev->igroup->device_list)) { - rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging); + if (iommufd_group_first_attach(igroup, IOMMU_NO_PASID)) { + rc = iommufd_group_setup_msi(igroup, hwpt_paging); if (rc) { iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); @@ -354,137 +386,215 @@ iommufd_device_attach_reserved_iova(struct iommufd_device *idev, /* The device attach/detach/replace helpers for attach_handle */ -/* Check if idev is attached to igroup->hwpt */ -static bool iommufd_device_is_attached(struct iommufd_device *idev) +static bool iommufd_device_is_attached(struct iommufd_device *idev, + ioasid_t pasid) { - struct iommufd_device *cur; + struct iommufd_attach *attach; - list_for_each_entry(cur, &idev->igroup->device_list, group_item) - if (cur == idev) - return true; - return false; + attach = xa_load(&idev->igroup->pasid_attach, pasid); + return xa_load(&attach->device_array, idev->obj.id); +} + +static int iommufd_hwpt_pasid_compat(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev, + ioasid_t pasid) +{ + struct iommufd_group *igroup = idev->igroup; + + lockdep_assert_held(&igroup->lock); + + if (pasid == IOMMU_NO_PASID) { + unsigned long start = IOMMU_NO_PASID; + + if (!hwpt->pasid_compat && + xa_find_after(&igroup->pasid_attach, + &start, UINT_MAX, XA_PRESENT)) + return -EINVAL; + } else { + struct iommufd_attach *attach; + + if (!hwpt->pasid_compat) + return -EINVAL; + + attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); + if (attach && attach->hwpt && !attach->hwpt->pasid_compat) + return -EINVAL; + } + + return 0; +} + +static bool iommufd_hwpt_compatible_device(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + struct pci_dev *pdev; + + if (!hwpt->fault || !dev_is_pci(idev->dev)) + return true; + + /* + * Once we turn on PCI/PRI support for VF, the response failure code + * should not be forwarded to the hardware due to PRI being a shared + * resource between PF and VFs. There is no coordination for this + * shared capability. This waits for a vPRI reset to recover. + */ + pdev = to_pci_dev(idev->dev); + + return (!pdev->is_virtfn || !pci_pri_supported(pdev)); } static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) + struct iommufd_device *idev, + ioasid_t pasid) { struct iommufd_attach_handle *handle; int rc; - lockdep_assert_held(&idev->igroup->lock); + if (!iommufd_hwpt_compatible_device(hwpt, idev)) + return -EINVAL; + + rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid); + if (rc) + return rc; handle = kzalloc(sizeof(*handle), GFP_KERNEL); if (!handle) return -ENOMEM; - if (hwpt->fault) { - rc = iommufd_fault_iopf_enable(idev); - if (rc) - goto out_free_handle; - } - handle->idev = idev; - rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, - &handle->handle); + if (pasid == IOMMU_NO_PASID) + rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, + &handle->handle); + else + rc = iommu_attach_device_pasid(hwpt->domain, idev->dev, pasid, + &handle->handle); if (rc) - goto out_disable_iopf; + goto out_free_handle; return 0; -out_disable_iopf: - if (hwpt->fault) - iommufd_fault_iopf_disable(idev); out_free_handle: kfree(handle); return rc; } static struct iommufd_attach_handle * -iommufd_device_get_attach_handle(struct iommufd_device *idev) +iommufd_device_get_attach_handle(struct iommufd_device *idev, ioasid_t pasid) { struct iommu_attach_handle *handle; lockdep_assert_held(&idev->igroup->lock); handle = - iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0); + iommu_attach_handle_get(idev->igroup->group, pasid, 0); if (IS_ERR(handle)) return NULL; return to_iommufd_handle(handle); } static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) + struct iommufd_device *idev, + ioasid_t pasid) { struct iommufd_attach_handle *handle; - handle = iommufd_device_get_attach_handle(idev); - iommu_detach_group_handle(hwpt->domain, idev->igroup->group); - if (hwpt->fault) { - iommufd_auto_response_faults(hwpt, handle); - iommufd_fault_iopf_disable(idev); - } + handle = iommufd_device_get_attach_handle(idev, pasid); + if (pasid == IOMMU_NO_PASID) + iommu_detach_group_handle(hwpt->domain, idev->igroup->group); + else + iommu_detach_device_pasid(hwpt->domain, idev->dev, pasid); + + iommufd_auto_response_faults(hwpt, handle); kfree(handle); } static int iommufd_hwpt_replace_device(struct iommufd_device *idev, + ioasid_t pasid, struct iommufd_hw_pagetable *hwpt, struct iommufd_hw_pagetable *old) { - struct iommufd_attach_handle *handle, *old_handle = - iommufd_device_get_attach_handle(idev); + struct iommufd_attach_handle *handle, *old_handle; int rc; + if (!iommufd_hwpt_compatible_device(hwpt, idev)) + return -EINVAL; + + rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid); + if (rc) + return rc; + + old_handle = iommufd_device_get_attach_handle(idev, pasid); + handle = kzalloc(sizeof(*handle), GFP_KERNEL); if (!handle) return -ENOMEM; - if (hwpt->fault && !old->fault) { - rc = iommufd_fault_iopf_enable(idev); - if (rc) - goto out_free_handle; - } - handle->idev = idev; - rc = iommu_replace_group_handle(idev->igroup->group, hwpt->domain, - &handle->handle); + if (pasid == IOMMU_NO_PASID) + rc = iommu_replace_group_handle(idev->igroup->group, + hwpt->domain, &handle->handle); + else + rc = iommu_replace_device_pasid(hwpt->domain, idev->dev, + pasid, &handle->handle); if (rc) - goto out_disable_iopf; + goto out_free_handle; - if (old->fault) { - iommufd_auto_response_faults(hwpt, old_handle); - if (!hwpt->fault) - iommufd_fault_iopf_disable(idev); - } + iommufd_auto_response_faults(hwpt, old_handle); kfree(old_handle); return 0; -out_disable_iopf: - if (hwpt->fault && !old->fault) - iommufd_fault_iopf_disable(idev); out_free_handle: kfree(handle); return rc; } int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) + struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); + bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID; + struct iommufd_group *igroup = idev->igroup; + struct iommufd_hw_pagetable *old_hwpt; + struct iommufd_attach *attach; int rc; - mutex_lock(&idev->igroup->lock); + mutex_lock(&igroup->lock); - if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) { - rc = -EINVAL; + attach = xa_cmpxchg(&igroup->pasid_attach, pasid, NULL, + XA_ZERO_ENTRY, GFP_KERNEL); + if (xa_is_err(attach)) { + rc = xa_err(attach); goto err_unlock; } - if (hwpt_paging) { + if (!attach) { + attach = kzalloc(sizeof(*attach), GFP_KERNEL); + if (!attach) { + rc = -ENOMEM; + goto err_release_pasid; + } + xa_init(&attach->device_array); + } + + old_hwpt = attach->hwpt; + + rc = xa_insert(&attach->device_array, idev->obj.id, XA_ZERO_ENTRY, + GFP_KERNEL); + if (rc) { + WARN_ON(rc == -EBUSY && !old_hwpt); + goto err_free_attach; + } + + if (old_hwpt && old_hwpt != hwpt) { + rc = -EINVAL; + goto err_release_devid; + } + + if (attach_resv) { rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging); if (rc) - goto err_unlock; + goto err_release_devid; } /* @@ -494,51 +604,74 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, * reserved regions are only updated during individual device * attachment. */ - if (list_empty(&idev->igroup->device_list)) { - rc = iommufd_hwpt_attach_device(hwpt, idev); + if (iommufd_group_first_attach(igroup, pasid)) { + rc = iommufd_hwpt_attach_device(hwpt, idev, pasid); if (rc) goto err_unresv; - idev->igroup->hwpt = hwpt; + attach->hwpt = hwpt; + WARN_ON(xa_is_err(xa_store(&igroup->pasid_attach, pasid, attach, + GFP_KERNEL))); } refcount_inc(&hwpt->obj.users); - list_add_tail(&idev->group_item, &idev->igroup->device_list); - mutex_unlock(&idev->igroup->lock); + WARN_ON(xa_is_err(xa_store(&attach->device_array, idev->obj.id, + idev, GFP_KERNEL))); + mutex_unlock(&igroup->lock); return 0; err_unresv: - if (hwpt_paging) + if (attach_resv) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); +err_release_devid: + xa_release(&attach->device_array, idev->obj.id); +err_free_attach: + if (iommufd_group_first_attach(igroup, pasid)) + kfree(attach); +err_release_pasid: + if (iommufd_group_first_attach(igroup, pasid)) + xa_release(&igroup->pasid_attach, pasid); err_unlock: - mutex_unlock(&idev->igroup->lock); + mutex_unlock(&igroup->lock); return rc; } struct iommufd_hw_pagetable * -iommufd_hw_pagetable_detach(struct iommufd_device *idev) +iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid) { - struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt; - struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); + struct iommufd_group *igroup = idev->igroup; + struct iommufd_hwpt_paging *hwpt_paging; + struct iommufd_hw_pagetable *hwpt; + struct iommufd_attach *attach; + + mutex_lock(&igroup->lock); + attach = xa_load(&igroup->pasid_attach, pasid); + if (!attach) { + mutex_unlock(&igroup->lock); + return NULL; + } + + hwpt = attach->hwpt; + hwpt_paging = find_hwpt_paging(hwpt); - mutex_lock(&idev->igroup->lock); - list_del(&idev->group_item); - if (list_empty(&idev->igroup->device_list)) { - iommufd_hwpt_detach_device(hwpt, idev); - idev->igroup->hwpt = NULL; + xa_erase(&attach->device_array, idev->obj.id); + if (xa_empty(&attach->device_array)) { + iommufd_hwpt_detach_device(hwpt, idev, pasid); + xa_erase(&igroup->pasid_attach, pasid); + kfree(attach); } - if (hwpt_paging) + if (hwpt_paging && pasid == IOMMU_NO_PASID) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev); - mutex_unlock(&idev->igroup->lock); + mutex_unlock(&igroup->lock); /* Caller must destroy hwpt */ return hwpt; } static struct iommufd_hw_pagetable * -iommufd_device_do_attach(struct iommufd_device *idev, +iommufd_device_do_attach(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_hw_pagetable *hwpt) { int rc; - rc = iommufd_hw_pagetable_attach(hwpt, idev); + rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid); if (rc) return ERR_PTR(rc); return NULL; @@ -548,11 +681,14 @@ static void iommufd_group_remove_reserved_iova(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { + struct iommufd_attach *attach; struct iommufd_device *cur; + unsigned long index; lockdep_assert_held(&igroup->lock); - list_for_each_entry(cur, &igroup->device_list, group_item) + attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); + xa_for_each(&attach->device_array, index, cur) iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev); } @@ -561,14 +697,17 @@ iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { struct iommufd_hwpt_paging *old_hwpt_paging; + struct iommufd_attach *attach; struct iommufd_device *cur; + unsigned long index; int rc; lockdep_assert_held(&igroup->lock); - old_hwpt_paging = find_hwpt_paging(igroup->hwpt); + attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID); + old_hwpt_paging = find_hwpt_paging(attach->hwpt); if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) { - list_for_each_entry(cur, &igroup->device_list, group_item) { + xa_for_each(&attach->device_array, index, cur) { rc = iopt_table_enforce_dev_resv_regions( &hwpt_paging->ioas->iopt, cur->dev, NULL); if (rc) @@ -587,74 +726,81 @@ err_unresv: } static struct iommufd_hw_pagetable * -iommufd_device_do_replace(struct iommufd_device *idev, +iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_hw_pagetable *hwpt) { struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt); + bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID; struct iommufd_hwpt_paging *old_hwpt_paging; struct iommufd_group *igroup = idev->igroup; struct iommufd_hw_pagetable *old_hwpt; + struct iommufd_attach *attach; unsigned int num_devices; int rc; - mutex_lock(&idev->igroup->lock); + mutex_lock(&igroup->lock); - if (igroup->hwpt == NULL) { + attach = xa_load(&igroup->pasid_attach, pasid); + if (!attach) { rc = -EINVAL; goto err_unlock; } - if (!iommufd_device_is_attached(idev)) { + old_hwpt = attach->hwpt; + + WARN_ON(!old_hwpt || xa_empty(&attach->device_array)); + + if (!iommufd_device_is_attached(idev, pasid)) { rc = -EINVAL; goto err_unlock; } - if (hwpt == igroup->hwpt) { - mutex_unlock(&idev->igroup->lock); + if (hwpt == old_hwpt) { + mutex_unlock(&igroup->lock); return NULL; } - old_hwpt = igroup->hwpt; - if (hwpt_paging) { + if (attach_resv) { rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging); if (rc) goto err_unlock; } - rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt); + rc = iommufd_hwpt_replace_device(idev, pasid, hwpt, old_hwpt); if (rc) goto err_unresv; old_hwpt_paging = find_hwpt_paging(old_hwpt); - if (old_hwpt_paging && + if (old_hwpt_paging && pasid == IOMMU_NO_PASID && (!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas)) iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging); - igroup->hwpt = hwpt; + attach->hwpt = hwpt; - num_devices = list_count_nodes(&igroup->device_list); + num_devices = iommufd_group_device_num(igroup, pasid); /* - * Move the refcounts held by the device_list to the new hwpt. Retain a + * Move the refcounts held by the device_array to the new hwpt. Retain a * refcount for this thread as the caller will free it. */ refcount_add(num_devices, &hwpt->obj.users); if (num_devices > 1) WARN_ON(refcount_sub_and_test(num_devices - 1, &old_hwpt->obj.users)); - mutex_unlock(&idev->igroup->lock); + mutex_unlock(&igroup->lock); /* Caller must destroy old_hwpt */ return old_hwpt; err_unresv: - if (hwpt_paging) + if (attach_resv) iommufd_group_remove_reserved_iova(igroup, hwpt_paging); err_unlock: - mutex_unlock(&idev->igroup->lock); + mutex_unlock(&igroup->lock); return ERR_PTR(rc); } typedef struct iommufd_hw_pagetable *(*attach_fn)( - struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt); + struct iommufd_device *idev, ioasid_t pasid, + struct iommufd_hw_pagetable *hwpt); /* * When automatically managing the domains we search for a compatible domain in @@ -662,7 +808,7 @@ typedef struct iommufd_hw_pagetable *(*attach_fn)( * Automatic domain selection will never pick a manually created domain. */ static struct iommufd_hw_pagetable * -iommufd_device_auto_get_domain(struct iommufd_device *idev, +iommufd_device_auto_get_domain(struct iommufd_device *idev, ioasid_t pasid, struct iommufd_ioas *ioas, u32 *pt_id, attach_fn do_attach) { @@ -691,7 +837,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, hwpt = &hwpt_paging->common; if (!iommufd_lock_obj(&hwpt->obj)) continue; - destroy_hwpt = (*do_attach)(idev, hwpt); + destroy_hwpt = (*do_attach)(idev, pasid, hwpt); if (IS_ERR(destroy_hwpt)) { iommufd_put_object(idev->ictx, &hwpt->obj); /* @@ -709,8 +855,8 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, goto out_unlock; } - hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0, - immediate_attach, NULL); + hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, pasid, + 0, immediate_attach, NULL); if (IS_ERR(hwpt_paging)) { destroy_hwpt = ERR_CAST(hwpt_paging); goto out_unlock; @@ -718,7 +864,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, hwpt = &hwpt_paging->common; if (!immediate_attach) { - destroy_hwpt = (*do_attach)(idev, hwpt); + destroy_hwpt = (*do_attach)(idev, pasid, hwpt); if (IS_ERR(destroy_hwpt)) goto out_abort; } else { @@ -739,8 +885,9 @@ out_unlock: return destroy_hwpt; } -static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, - attach_fn do_attach) +static int iommufd_device_change_pt(struct iommufd_device *idev, + ioasid_t pasid, + u32 *pt_id, attach_fn do_attach) { struct iommufd_hw_pagetable *destroy_hwpt; struct iommufd_object *pt_obj; @@ -755,7 +902,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, struct iommufd_hw_pagetable *hwpt = container_of(pt_obj, struct iommufd_hw_pagetable, obj); - destroy_hwpt = (*do_attach)(idev, hwpt); + destroy_hwpt = (*do_attach)(idev, pasid, hwpt); if (IS_ERR(destroy_hwpt)) goto out_put_pt_obj; break; @@ -764,8 +911,8 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, struct iommufd_ioas *ioas = container_of(pt_obj, struct iommufd_ioas, obj); - destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id, - do_attach); + destroy_hwpt = iommufd_device_auto_get_domain(idev, pasid, ioas, + pt_id, do_attach); if (IS_ERR(destroy_hwpt)) goto out_put_pt_obj; break; @@ -787,22 +934,26 @@ out_put_pt_obj: } /** - * iommufd_device_attach - Connect a device to an iommu_domain + * iommufd_device_attach - Connect a device/pasid to an iommu_domain * @idev: device to attach + * @pasid: pasid to attach * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING * Output the IOMMUFD_OBJ_HWPT_PAGING ID * - * This connects the device to an iommu_domain, either automatically or manually - * selected. Once this completes the device could do DMA. + * This connects the device/pasid to an iommu_domain, either automatically + * or manually selected. Once this completes the device could do DMA with + * @pasid. @pasid is IOMMU_NO_PASID if this attach is for no pasid usage. * * The caller should return the resulting pt_id back to userspace. * This function is undone by calling iommufd_device_detach(). */ -int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) +int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id) { int rc; - rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach); + rc = iommufd_device_change_pt(idev, pasid, pt_id, + &iommufd_device_do_attach); if (rc) return rc; @@ -816,8 +967,9 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD"); /** - * iommufd_device_replace - Change the device's iommu_domain + * iommufd_device_replace - Change the device/pasid's iommu_domain * @idev: device to change + * @pasid: pasid to change * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING * Output the IOMMUFD_OBJ_HWPT_PAGING ID * @@ -828,27 +980,33 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD"); * * If it fails then no change is made to the attachment. The iommu driver may * implement this so there is no disruption in translation. This can only be - * called if iommufd_device_attach() has already succeeded. + * called if iommufd_device_attach() has already succeeded. @pasid is + * IOMMU_NO_PASID for no pasid usage. */ -int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id) +int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id) { - return iommufd_device_change_pt(idev, pt_id, + return iommufd_device_change_pt(idev, pasid, pt_id, &iommufd_device_do_replace); } EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, "IOMMUFD"); /** - * iommufd_device_detach - Disconnect a device to an iommu_domain + * iommufd_device_detach - Disconnect a device/device to an iommu_domain * @idev: device to detach + * @pasid: pasid to detach * * Undo iommufd_device_attach(). This disconnects the idev from the previously * attached pt_id. The device returns back to a blocked DMA translation. + * @pasid is IOMMU_NO_PASID for no pasid usage. */ -void iommufd_device_detach(struct iommufd_device *idev) +void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid) { struct iommufd_hw_pagetable *hwpt; - hwpt = iommufd_hw_pagetable_detach(idev); + hwpt = iommufd_hw_pagetable_detach(idev, pasid); + if (!hwpt) + return; iommufd_hw_pagetable_put(idev->ictx, hwpt); refcount_dec(&idev->obj.users); } @@ -1304,7 +1462,8 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) void *data; int rc; - if (cmd->flags || cmd->__reserved) + if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] || + cmd->__reserved[2]) return -EOPNOTSUPP; idev = iommufd_get_device(ucmd, cmd->dev_id); @@ -1361,6 +1520,36 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING; + cmd->out_max_pasid_log2 = 0; + /* + * Currently, all iommu drivers enable PASID in the probe_device() + * op if iommu and device supports it. So the max_pasids stored in + * dev->iommu indicates both PASID support and enable status. A + * non-zero dev->iommu->max_pasids means PASID is supported and + * enabled. The iommufd only reports PASID capability to userspace + * if it's enabled. + */ + if (idev->dev->iommu->max_pasids) { + cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids); + + if (dev_is_pci(idev->dev)) { + struct pci_dev *pdev = to_pci_dev(idev->dev); + int ctrl; + + ctrl = pci_pasid_status(pdev); + + WARN_ON_ONCE(ctrl < 0 || + !(ctrl & PCI_PASID_CTRL_ENABLE)); + + if (ctrl & PCI_PASID_CTRL_EXEC) + cmd->out_capabilities |= + IOMMU_HW_CAP_PCI_PASID_EXEC; + if (ctrl & PCI_PASID_CTRL_PRIV) + cmd->out_capabilities |= + IOMMU_HW_CAP_PCI_PASID_PRIV; + } + } + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_free: kfree(data); diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index 2d98b04ff1cb..922cd1fe7ec2 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -49,5 +49,203 @@ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, } EXPORT_SYMBOL_NS_GPL(iommufd_viommu_find_dev, "IOMMUFD"); +/* Return -ENOENT if device is not associated to the vIOMMU */ +int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, + struct device *dev, unsigned long *vdev_id) +{ + struct iommufd_vdevice *vdev; + unsigned long index; + int rc = -ENOENT; + + if (WARN_ON_ONCE(!vdev_id)) + return -EINVAL; + + xa_lock(&viommu->vdevs); + xa_for_each(&viommu->vdevs, index, vdev) { + if (vdev->dev == dev) { + *vdev_id = vdev->id; + rc = 0; + break; + } + } + xa_unlock(&viommu->vdevs); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_viommu_get_vdev_id, "IOMMUFD"); + +/* + * Typically called in driver's threaded IRQ handler. + * The @type and @event_data must be defined in include/uapi/linux/iommufd.h + */ +int iommufd_viommu_report_event(struct iommufd_viommu *viommu, + enum iommu_veventq_type type, void *event_data, + size_t data_len) +{ + struct iommufd_veventq *veventq; + struct iommufd_vevent *vevent; + int rc = 0; + + if (WARN_ON_ONCE(!data_len || !event_data)) + return -EINVAL; + + down_read(&viommu->veventqs_rwsem); + + veventq = iommufd_viommu_find_veventq(viommu, type); + if (!veventq) { + rc = -EOPNOTSUPP; + goto out_unlock_veventqs; + } + + spin_lock(&veventq->common.lock); + if (veventq->num_events == veventq->depth) { + vevent = &veventq->lost_events_header; + goto out_set_header; + } + + vevent = kzalloc(struct_size(vevent, event_data, data_len), GFP_ATOMIC); + if (!vevent) { + rc = -ENOMEM; + vevent = &veventq->lost_events_header; + goto out_set_header; + } + memcpy(vevent->event_data, event_data, data_len); + vevent->data_len = data_len; + veventq->num_events++; + +out_set_header: + iommufd_vevent_handler(veventq, vevent); + spin_unlock(&veventq->common.lock); +out_unlock_veventqs: + up_read(&viommu->veventqs_rwsem); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_viommu_report_event, "IOMMUFD"); + +#ifdef CONFIG_IRQ_MSI_IOMMU +/* + * Get a iommufd_sw_msi_map for the msi physical address requested by the irq + * layer. The mapping to IOVA is global to the iommufd file descriptor, every + * domain that is attached to a device using the same MSI parameters will use + * the same IOVA. + */ +static struct iommufd_sw_msi_map * +iommufd_sw_msi_get_map(struct iommufd_ctx *ictx, phys_addr_t msi_addr, + phys_addr_t sw_msi_start) +{ + struct iommufd_sw_msi_map *cur; + unsigned int max_pgoff = 0; + + lockdep_assert_held(&ictx->sw_msi_lock); + + list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) { + if (cur->sw_msi_start != sw_msi_start) + continue; + max_pgoff = max(max_pgoff, cur->pgoff + 1); + if (cur->msi_addr == msi_addr) + return cur; + } + + if (ictx->sw_msi_id >= + BITS_PER_BYTE * sizeof_field(struct iommufd_sw_msi_maps, bitmap)) + return ERR_PTR(-EOVERFLOW); + + cur = kzalloc(sizeof(*cur), GFP_KERNEL); + if (!cur) + return ERR_PTR(-ENOMEM); + + cur->sw_msi_start = sw_msi_start; + cur->msi_addr = msi_addr; + cur->pgoff = max_pgoff; + cur->id = ictx->sw_msi_id++; + list_add_tail(&cur->sw_msi_item, &ictx->sw_msi_list); + return cur; +} + +int iommufd_sw_msi_install(struct iommufd_ctx *ictx, + struct iommufd_hwpt_paging *hwpt_paging, + struct iommufd_sw_msi_map *msi_map) +{ + unsigned long iova; + + lockdep_assert_held(&ictx->sw_msi_lock); + + iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE; + if (!test_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap)) { + int rc; + + rc = iommu_map(hwpt_paging->common.domain, iova, + msi_map->msi_addr, PAGE_SIZE, + IOMMU_WRITE | IOMMU_READ | IOMMU_MMIO, + GFP_KERNEL_ACCOUNT); + if (rc) + return rc; + __set_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap); + } + return 0; +} +EXPORT_SYMBOL_NS_GPL(iommufd_sw_msi_install, "IOMMUFD_INTERNAL"); + +/* + * Called by the irq code if the platform translates the MSI address through the + * IOMMU. msi_addr is the physical address of the MSI page. iommufd will + * allocate a fd global iova for the physical page that is the same on all + * domains and devices. + */ +int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, + phys_addr_t msi_addr) +{ + struct device *dev = msi_desc_to_dev(desc); + struct iommufd_hwpt_paging *hwpt_paging; + struct iommu_attach_handle *raw_handle; + struct iommufd_attach_handle *handle; + struct iommufd_sw_msi_map *msi_map; + struct iommufd_ctx *ictx; + unsigned long iova; + int rc; + + /* + * It is safe to call iommu_attach_handle_get() here because the iommu + * core code invokes this under the group mutex which also prevents any + * change of the attach handle for the duration of this function. + */ + iommu_group_mutex_assert(dev); + + raw_handle = + iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0); + if (IS_ERR(raw_handle)) + return 0; + hwpt_paging = find_hwpt_paging(domain->iommufd_hwpt); + + handle = to_iommufd_handle(raw_handle); + /* No IOMMU_RESV_SW_MSI means no change to the msi_msg */ + if (handle->idev->igroup->sw_msi_start == PHYS_ADDR_MAX) + return 0; + + ictx = handle->idev->ictx; + guard(mutex)(&ictx->sw_msi_lock); + /* + * The input msi_addr is the exact byte offset of the MSI doorbell, we + * assume the caller has checked that it is contained with a MMIO region + * that is secure to map at PAGE_SIZE. + */ + msi_map = iommufd_sw_msi_get_map(handle->idev->ictx, + msi_addr & PAGE_MASK, + handle->idev->igroup->sw_msi_start); + if (IS_ERR(msi_map)) + return PTR_ERR(msi_map); + + rc = iommufd_sw_msi_install(ictx, hwpt_paging, msi_map); + if (rc) + return rc; + __set_bit(msi_map->id, handle->idev->igroup->required_sw_msi.bitmap); + + iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE; + msi_desc_set_iommu_msi_iova(desc, iova, PAGE_SHIFT); + return 0; +} +EXPORT_SYMBOL_NS_GPL(iommufd_sw_msi, "IOMMUFD"); +#endif + MODULE_DESCRIPTION("iommufd code shared with builtin modules"); +MODULE_IMPORT_NS("IOMMUFD_INTERNAL"); MODULE_LICENSE("GPL"); diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c new file mode 100644 index 000000000000..e373b9eec7f5 --- /dev/null +++ b/drivers/iommu/iommufd/eventq.c @@ -0,0 +1,552 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2024 Intel Corporation + */ +#define pr_fmt(fmt) "iommufd: " fmt + +#include <linux/anon_inodes.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/iommufd.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/poll.h> +#include <uapi/linux/iommufd.h> + +#include "../iommu-priv.h" +#include "iommufd_private.h" + +/* IOMMUFD_OBJ_FAULT Functions */ +void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, + struct iommufd_attach_handle *handle) +{ + struct iommufd_fault *fault = hwpt->fault; + struct iopf_group *group, *next; + struct list_head free_list; + unsigned long index; + + if (!fault || !handle) + return; + INIT_LIST_HEAD(&free_list); + + mutex_lock(&fault->mutex); + spin_lock(&fault->common.lock); + list_for_each_entry_safe(group, next, &fault->common.deliver, node) { + if (group->attach_handle != &handle->handle) + continue; + list_move(&group->node, &free_list); + } + spin_unlock(&fault->common.lock); + + list_for_each_entry_safe(group, next, &free_list, node) { + list_del(&group->node); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } + + xa_for_each(&fault->response, index, group) { + if (group->attach_handle != &handle->handle) + continue; + xa_erase(&fault->response, index); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } + mutex_unlock(&fault->mutex); +} + +void iommufd_fault_destroy(struct iommufd_object *obj) +{ + struct iommufd_eventq *eventq = + container_of(obj, struct iommufd_eventq, obj); + struct iommufd_fault *fault = eventq_to_fault(eventq); + struct iopf_group *group, *next; + unsigned long index; + + /* + * The iommufd object's reference count is zero at this point. + * We can be confident that no other threads are currently + * accessing this pointer. Therefore, acquiring the mutex here + * is unnecessary. + */ + list_for_each_entry_safe(group, next, &fault->common.deliver, node) { + list_del(&group->node); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } + xa_for_each(&fault->response, index, group) { + xa_erase(&fault->response, index); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } + xa_destroy(&fault->response); + mutex_destroy(&fault->mutex); +} + +static void iommufd_compose_fault_message(struct iommu_fault *fault, + struct iommu_hwpt_pgfault *hwpt_fault, + struct iommufd_device *idev, + u32 cookie) +{ + hwpt_fault->flags = fault->prm.flags; + hwpt_fault->dev_id = idev->obj.id; + hwpt_fault->pasid = fault->prm.pasid; + hwpt_fault->grpid = fault->prm.grpid; + hwpt_fault->perm = fault->prm.perm; + hwpt_fault->addr = fault->prm.addr; + hwpt_fault->length = 0; + hwpt_fault->cookie = cookie; +} + +/* Fetch the first node out of the fault->deliver list */ +static struct iopf_group * +iommufd_fault_deliver_fetch(struct iommufd_fault *fault) +{ + struct list_head *list = &fault->common.deliver; + struct iopf_group *group = NULL; + + spin_lock(&fault->common.lock); + if (!list_empty(list)) { + group = list_first_entry(list, struct iopf_group, node); + list_del(&group->node); + } + spin_unlock(&fault->common.lock); + return group; +} + +/* Restore a node back to the head of the fault->deliver list */ +static void iommufd_fault_deliver_restore(struct iommufd_fault *fault, + struct iopf_group *group) +{ + spin_lock(&fault->common.lock); + list_add(&group->node, &fault->common.deliver); + spin_unlock(&fault->common.lock); +} + +static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, + size_t count, loff_t *ppos) +{ + size_t fault_size = sizeof(struct iommu_hwpt_pgfault); + struct iommufd_eventq *eventq = filep->private_data; + struct iommufd_fault *fault = eventq_to_fault(eventq); + struct iommu_hwpt_pgfault data = {}; + struct iommufd_device *idev; + struct iopf_group *group; + struct iopf_fault *iopf; + size_t done = 0; + int rc = 0; + + if (*ppos || count % fault_size) + return -ESPIPE; + + mutex_lock(&fault->mutex); + while ((group = iommufd_fault_deliver_fetch(fault))) { + if (done >= count || + group->fault_count * fault_size > count - done) { + iommufd_fault_deliver_restore(fault, group); + break; + } + + rc = xa_alloc(&fault->response, &group->cookie, group, + xa_limit_32b, GFP_KERNEL); + if (rc) { + iommufd_fault_deliver_restore(fault, group); + break; + } + + idev = to_iommufd_handle(group->attach_handle)->idev; + list_for_each_entry(iopf, &group->faults, list) { + iommufd_compose_fault_message(&iopf->fault, + &data, idev, + group->cookie); + if (copy_to_user(buf + done, &data, fault_size)) { + xa_erase(&fault->response, group->cookie); + iommufd_fault_deliver_restore(fault, group); + rc = -EFAULT; + break; + } + done += fault_size; + } + } + mutex_unlock(&fault->mutex); + + return done == 0 ? rc : done; +} + +static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf, + size_t count, loff_t *ppos) +{ + size_t response_size = sizeof(struct iommu_hwpt_page_response); + struct iommufd_eventq *eventq = filep->private_data; + struct iommufd_fault *fault = eventq_to_fault(eventq); + struct iommu_hwpt_page_response response; + struct iopf_group *group; + size_t done = 0; + int rc = 0; + + if (*ppos || count % response_size) + return -ESPIPE; + + mutex_lock(&fault->mutex); + while (count > done) { + rc = copy_from_user(&response, buf + done, response_size); + if (rc) + break; + + static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS == + (int)IOMMU_PAGE_RESP_SUCCESS); + static_assert((int)IOMMUFD_PAGE_RESP_INVALID == + (int)IOMMU_PAGE_RESP_INVALID); + if (response.code != IOMMUFD_PAGE_RESP_SUCCESS && + response.code != IOMMUFD_PAGE_RESP_INVALID) { + rc = -EINVAL; + break; + } + + group = xa_erase(&fault->response, response.cookie); + if (!group) { + rc = -EINVAL; + break; + } + + iopf_group_response(group, response.code); + iopf_free_group(group); + done += response_size; + } + mutex_unlock(&fault->mutex); + + return done == 0 ? rc : done; +} + +/* IOMMUFD_OBJ_VEVENTQ Functions */ + +void iommufd_veventq_abort(struct iommufd_object *obj) +{ + struct iommufd_eventq *eventq = + container_of(obj, struct iommufd_eventq, obj); + struct iommufd_veventq *veventq = eventq_to_veventq(eventq); + struct iommufd_viommu *viommu = veventq->viommu; + struct iommufd_vevent *cur, *next; + + lockdep_assert_held_write(&viommu->veventqs_rwsem); + + list_for_each_entry_safe(cur, next, &eventq->deliver, node) { + list_del(&cur->node); + if (cur != &veventq->lost_events_header) + kfree(cur); + } + + refcount_dec(&viommu->obj.users); + list_del(&veventq->node); +} + +void iommufd_veventq_destroy(struct iommufd_object *obj) +{ + struct iommufd_veventq *veventq = eventq_to_veventq( + container_of(obj, struct iommufd_eventq, obj)); + + down_write(&veventq->viommu->veventqs_rwsem); + iommufd_veventq_abort(obj); + up_write(&veventq->viommu->veventqs_rwsem); +} + +static struct iommufd_vevent * +iommufd_veventq_deliver_fetch(struct iommufd_veventq *veventq) +{ + struct iommufd_eventq *eventq = &veventq->common; + struct list_head *list = &eventq->deliver; + struct iommufd_vevent *vevent = NULL; + + spin_lock(&eventq->lock); + if (!list_empty(list)) { + struct iommufd_vevent *next; + + next = list_first_entry(list, struct iommufd_vevent, node); + /* Make a copy of the lost_events_header for copy_to_user */ + if (next == &veventq->lost_events_header) { + vevent = kzalloc(sizeof(*vevent), GFP_ATOMIC); + if (!vevent) + goto out_unlock; + } + list_del(&next->node); + if (vevent) + memcpy(vevent, next, sizeof(*vevent)); + else + vevent = next; + } +out_unlock: + spin_unlock(&eventq->lock); + return vevent; +} + +static void iommufd_veventq_deliver_restore(struct iommufd_veventq *veventq, + struct iommufd_vevent *vevent) +{ + struct iommufd_eventq *eventq = &veventq->common; + struct list_head *list = &eventq->deliver; + + spin_lock(&eventq->lock); + if (vevent_for_lost_events_header(vevent)) { + /* Remove the copy of the lost_events_header */ + kfree(vevent); + vevent = NULL; + /* An empty list needs the lost_events_header back */ + if (list_empty(list)) + vevent = &veventq->lost_events_header; + } + if (vevent) + list_add(&vevent->node, list); + spin_unlock(&eventq->lock); +} + +static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf, + size_t count, loff_t *ppos) +{ + struct iommufd_eventq *eventq = filep->private_data; + struct iommufd_veventq *veventq = eventq_to_veventq(eventq); + struct iommufd_vevent_header *hdr; + struct iommufd_vevent *cur; + size_t done = 0; + int rc = 0; + + if (*ppos) + return -ESPIPE; + + while ((cur = iommufd_veventq_deliver_fetch(veventq))) { + /* Validate the remaining bytes against the header size */ + if (done >= count || sizeof(*hdr) > count - done) { + iommufd_veventq_deliver_restore(veventq, cur); + break; + } + hdr = &cur->header; + + /* If being a normal vEVENT, validate against the full size */ + if (!vevent_for_lost_events_header(cur) && + sizeof(hdr) + cur->data_len > count - done) { + iommufd_veventq_deliver_restore(veventq, cur); + break; + } + + if (copy_to_user(buf + done, hdr, sizeof(*hdr))) { + iommufd_veventq_deliver_restore(veventq, cur); + rc = -EFAULT; + break; + } + done += sizeof(*hdr); + + if (cur->data_len && + copy_to_user(buf + done, cur->event_data, cur->data_len)) { + iommufd_veventq_deliver_restore(veventq, cur); + rc = -EFAULT; + break; + } + spin_lock(&eventq->lock); + if (!vevent_for_lost_events_header(cur)) + veventq->num_events--; + spin_unlock(&eventq->lock); + done += cur->data_len; + kfree(cur); + } + + return done == 0 ? rc : done; +} + +/* Common Event Queue Functions */ + +static __poll_t iommufd_eventq_fops_poll(struct file *filep, + struct poll_table_struct *wait) +{ + struct iommufd_eventq *eventq = filep->private_data; + __poll_t pollflags = 0; + + if (eventq->obj.type == IOMMUFD_OBJ_FAULT) + pollflags |= EPOLLOUT; + + poll_wait(filep, &eventq->wait_queue, wait); + spin_lock(&eventq->lock); + if (!list_empty(&eventq->deliver)) + pollflags |= EPOLLIN | EPOLLRDNORM; + spin_unlock(&eventq->lock); + + return pollflags; +} + +static int iommufd_eventq_fops_release(struct inode *inode, struct file *filep) +{ + struct iommufd_eventq *eventq = filep->private_data; + + refcount_dec(&eventq->obj.users); + iommufd_ctx_put(eventq->ictx); + return 0; +} + +#define INIT_EVENTQ_FOPS(read_op, write_op) \ + ((const struct file_operations){ \ + .owner = THIS_MODULE, \ + .open = nonseekable_open, \ + .read = read_op, \ + .write = write_op, \ + .poll = iommufd_eventq_fops_poll, \ + .release = iommufd_eventq_fops_release, \ + }) + +static int iommufd_eventq_init(struct iommufd_eventq *eventq, char *name, + struct iommufd_ctx *ictx, + const struct file_operations *fops) +{ + struct file *filep; + int fdno; + + spin_lock_init(&eventq->lock); + INIT_LIST_HEAD(&eventq->deliver); + init_waitqueue_head(&eventq->wait_queue); + + filep = anon_inode_getfile(name, fops, eventq, O_RDWR); + if (IS_ERR(filep)) + return PTR_ERR(filep); + + eventq->ictx = ictx; + iommufd_ctx_get(eventq->ictx); + eventq->filep = filep; + refcount_inc(&eventq->obj.users); + + fdno = get_unused_fd_flags(O_CLOEXEC); + if (fdno < 0) + fput(filep); + return fdno; +} + +static const struct file_operations iommufd_fault_fops = + INIT_EVENTQ_FOPS(iommufd_fault_fops_read, iommufd_fault_fops_write); + +int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) +{ + struct iommu_fault_alloc *cmd = ucmd->cmd; + struct iommufd_fault *fault; + int fdno; + int rc; + + if (cmd->flags) + return -EOPNOTSUPP; + + fault = __iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT, + common.obj); + if (IS_ERR(fault)) + return PTR_ERR(fault); + + xa_init_flags(&fault->response, XA_FLAGS_ALLOC1); + mutex_init(&fault->mutex); + + fdno = iommufd_eventq_init(&fault->common, "[iommufd-pgfault]", + ucmd->ictx, &iommufd_fault_fops); + if (fdno < 0) { + rc = fdno; + goto out_abort; + } + + cmd->out_fault_id = fault->common.obj.id; + cmd->out_fault_fd = fdno; + + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_put_fdno; + iommufd_object_finalize(ucmd->ictx, &fault->common.obj); + + fd_install(fdno, fault->common.filep); + + return 0; +out_put_fdno: + put_unused_fd(fdno); + fput(fault->common.filep); +out_abort: + iommufd_object_abort_and_destroy(ucmd->ictx, &fault->common.obj); + + return rc; +} + +int iommufd_fault_iopf_handler(struct iopf_group *group) +{ + struct iommufd_hw_pagetable *hwpt; + struct iommufd_fault *fault; + + hwpt = group->attach_handle->domain->iommufd_hwpt; + fault = hwpt->fault; + + spin_lock(&fault->common.lock); + list_add_tail(&group->node, &fault->common.deliver); + spin_unlock(&fault->common.lock); + + wake_up_interruptible(&fault->common.wait_queue); + + return 0; +} + +static const struct file_operations iommufd_veventq_fops = + INIT_EVENTQ_FOPS(iommufd_veventq_fops_read, NULL); + +int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd) +{ + struct iommu_veventq_alloc *cmd = ucmd->cmd; + struct iommufd_veventq *veventq; + struct iommufd_viommu *viommu; + int fdno; + int rc; + + if (cmd->flags || cmd->__reserved || + cmd->type == IOMMU_VEVENTQ_TYPE_DEFAULT) + return -EOPNOTSUPP; + if (!cmd->veventq_depth) + return -EINVAL; + + viommu = iommufd_get_viommu(ucmd, cmd->viommu_id); + if (IS_ERR(viommu)) + return PTR_ERR(viommu); + + down_write(&viommu->veventqs_rwsem); + + if (iommufd_viommu_find_veventq(viommu, cmd->type)) { + rc = -EEXIST; + goto out_unlock_veventqs; + } + + veventq = __iommufd_object_alloc(ucmd->ictx, veventq, + IOMMUFD_OBJ_VEVENTQ, common.obj); + if (IS_ERR(veventq)) { + rc = PTR_ERR(veventq); + goto out_unlock_veventqs; + } + + veventq->type = cmd->type; + veventq->viommu = viommu; + refcount_inc(&viommu->obj.users); + veventq->depth = cmd->veventq_depth; + list_add_tail(&veventq->node, &viommu->veventqs); + veventq->lost_events_header.header.flags = + IOMMU_VEVENTQ_FLAG_LOST_EVENTS; + + fdno = iommufd_eventq_init(&veventq->common, "[iommufd-viommu-event]", + ucmd->ictx, &iommufd_veventq_fops); + if (fdno < 0) { + rc = fdno; + goto out_abort; + } + + cmd->out_veventq_id = veventq->common.obj.id; + cmd->out_veventq_fd = fdno; + + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_put_fdno; + + iommufd_object_finalize(ucmd->ictx, &veventq->common.obj); + fd_install(fdno, veventq->common.filep); + goto out_unlock_veventqs; + +out_put_fdno: + put_unused_fd(fdno); + fput(veventq->common.filep); +out_abort: + iommufd_object_abort_and_destroy(ucmd->ictx, &veventq->common.obj); +out_unlock_veventqs: + up_write(&viommu->veventqs_rwsem); + iommufd_put_object(ucmd->ictx, &viommu->obj); + return rc; +} diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c deleted file mode 100644 index cb844e6799d4..000000000000 --- a/drivers/iommu/iommufd/fault.c +++ /dev/null @@ -1,462 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright (C) 2024 Intel Corporation - */ -#define pr_fmt(fmt) "iommufd: " fmt - -#include <linux/anon_inodes.h> -#include <linux/file.h> -#include <linux/fs.h> -#include <linux/iommufd.h> -#include <linux/module.h> -#include <linux/mutex.h> -#include <linux/pci.h> -#include <linux/pci-ats.h> -#include <linux/poll.h> -#include <uapi/linux/iommufd.h> - -#include "../iommu-priv.h" -#include "iommufd_private.h" - -int iommufd_fault_iopf_enable(struct iommufd_device *idev) -{ - struct device *dev = idev->dev; - int ret; - - /* - * Once we turn on PCI/PRI support for VF, the response failure code - * should not be forwarded to the hardware due to PRI being a shared - * resource between PF and VFs. There is no coordination for this - * shared capability. This waits for a vPRI reset to recover. - */ - if (dev_is_pci(dev)) { - struct pci_dev *pdev = to_pci_dev(dev); - - if (pdev->is_virtfn && pci_pri_supported(pdev)) - return -EINVAL; - } - - mutex_lock(&idev->iopf_lock); - /* Device iopf has already been on. */ - if (++idev->iopf_enabled > 1) { - mutex_unlock(&idev->iopf_lock); - return 0; - } - - ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF); - if (ret) - --idev->iopf_enabled; - mutex_unlock(&idev->iopf_lock); - - return ret; -} - -void iommufd_fault_iopf_disable(struct iommufd_device *idev) -{ - mutex_lock(&idev->iopf_lock); - if (!WARN_ON(idev->iopf_enabled == 0)) { - if (--idev->iopf_enabled == 0) - iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF); - } - mutex_unlock(&idev->iopf_lock); -} - -static int __fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) -{ - struct iommufd_attach_handle *handle; - int ret; - - handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (!handle) - return -ENOMEM; - - handle->idev = idev; - ret = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, - &handle->handle); - if (ret) - kfree(handle); - - return ret; -} - -int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) -{ - int ret; - - if (!hwpt->fault) - return -EINVAL; - - ret = iommufd_fault_iopf_enable(idev); - if (ret) - return ret; - - ret = __fault_domain_attach_dev(hwpt, idev); - if (ret) - iommufd_fault_iopf_disable(idev); - - return ret; -} - -void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, - struct iommufd_attach_handle *handle) -{ - struct iommufd_fault *fault = hwpt->fault; - struct iopf_group *group, *next; - struct list_head free_list; - unsigned long index; - - if (!fault) - return; - INIT_LIST_HEAD(&free_list); - - mutex_lock(&fault->mutex); - spin_lock(&fault->lock); - list_for_each_entry_safe(group, next, &fault->deliver, node) { - if (group->attach_handle != &handle->handle) - continue; - list_move(&group->node, &free_list); - } - spin_unlock(&fault->lock); - - list_for_each_entry_safe(group, next, &free_list, node) { - list_del(&group->node); - iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); - iopf_free_group(group); - } - - xa_for_each(&fault->response, index, group) { - if (group->attach_handle != &handle->handle) - continue; - xa_erase(&fault->response, index); - iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); - iopf_free_group(group); - } - mutex_unlock(&fault->mutex); -} - -static struct iommufd_attach_handle * -iommufd_device_get_attach_handle(struct iommufd_device *idev) -{ - struct iommu_attach_handle *handle; - - handle = iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0); - if (IS_ERR(handle)) - return NULL; - - return to_iommufd_handle(handle); -} - -void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) -{ - struct iommufd_attach_handle *handle; - - handle = iommufd_device_get_attach_handle(idev); - iommu_detach_group_handle(hwpt->domain, idev->igroup->group); - iommufd_auto_response_faults(hwpt, handle); - iommufd_fault_iopf_disable(idev); - kfree(handle); -} - -static int __fault_domain_replace_dev(struct iommufd_device *idev, - struct iommufd_hw_pagetable *hwpt, - struct iommufd_hw_pagetable *old) -{ - struct iommufd_attach_handle *handle, *curr = NULL; - int ret; - - if (old->fault) - curr = iommufd_device_get_attach_handle(idev); - - if (hwpt->fault) { - handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (!handle) - return -ENOMEM; - - handle->idev = idev; - ret = iommu_replace_group_handle(idev->igroup->group, - hwpt->domain, &handle->handle); - } else { - ret = iommu_replace_group_handle(idev->igroup->group, - hwpt->domain, NULL); - } - - if (!ret && curr) { - iommufd_auto_response_faults(old, curr); - kfree(curr); - } - - return ret; -} - -int iommufd_fault_domain_replace_dev(struct iommufd_device *idev, - struct iommufd_hw_pagetable *hwpt, - struct iommufd_hw_pagetable *old) -{ - bool iopf_off = !hwpt->fault && old->fault; - bool iopf_on = hwpt->fault && !old->fault; - int ret; - - if (iopf_on) { - ret = iommufd_fault_iopf_enable(idev); - if (ret) - return ret; - } - - ret = __fault_domain_replace_dev(idev, hwpt, old); - if (ret) { - if (iopf_on) - iommufd_fault_iopf_disable(idev); - return ret; - } - - if (iopf_off) - iommufd_fault_iopf_disable(idev); - - return 0; -} - -void iommufd_fault_destroy(struct iommufd_object *obj) -{ - struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj); - struct iopf_group *group, *next; - unsigned long index; - - /* - * The iommufd object's reference count is zero at this point. - * We can be confident that no other threads are currently - * accessing this pointer. Therefore, acquiring the mutex here - * is unnecessary. - */ - list_for_each_entry_safe(group, next, &fault->deliver, node) { - list_del(&group->node); - iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); - iopf_free_group(group); - } - xa_for_each(&fault->response, index, group) { - xa_erase(&fault->response, index); - iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); - iopf_free_group(group); - } - xa_destroy(&fault->response); - mutex_destroy(&fault->mutex); -} - -static void iommufd_compose_fault_message(struct iommu_fault *fault, - struct iommu_hwpt_pgfault *hwpt_fault, - struct iommufd_device *idev, - u32 cookie) -{ - hwpt_fault->flags = fault->prm.flags; - hwpt_fault->dev_id = idev->obj.id; - hwpt_fault->pasid = fault->prm.pasid; - hwpt_fault->grpid = fault->prm.grpid; - hwpt_fault->perm = fault->prm.perm; - hwpt_fault->addr = fault->prm.addr; - hwpt_fault->length = 0; - hwpt_fault->cookie = cookie; -} - -static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, - size_t count, loff_t *ppos) -{ - size_t fault_size = sizeof(struct iommu_hwpt_pgfault); - struct iommufd_fault *fault = filep->private_data; - struct iommu_hwpt_pgfault data = {}; - struct iommufd_device *idev; - struct iopf_group *group; - struct iopf_fault *iopf; - size_t done = 0; - int rc = 0; - - if (*ppos || count % fault_size) - return -ESPIPE; - - mutex_lock(&fault->mutex); - while ((group = iommufd_fault_deliver_fetch(fault))) { - if (done >= count || - group->fault_count * fault_size > count - done) { - iommufd_fault_deliver_restore(fault, group); - break; - } - - rc = xa_alloc(&fault->response, &group->cookie, group, - xa_limit_32b, GFP_KERNEL); - if (rc) { - iommufd_fault_deliver_restore(fault, group); - break; - } - - idev = to_iommufd_handle(group->attach_handle)->idev; - list_for_each_entry(iopf, &group->faults, list) { - iommufd_compose_fault_message(&iopf->fault, - &data, idev, - group->cookie); - if (copy_to_user(buf + done, &data, fault_size)) { - xa_erase(&fault->response, group->cookie); - iommufd_fault_deliver_restore(fault, group); - rc = -EFAULT; - break; - } - done += fault_size; - } - } - mutex_unlock(&fault->mutex); - - return done == 0 ? rc : done; -} - -static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf, - size_t count, loff_t *ppos) -{ - size_t response_size = sizeof(struct iommu_hwpt_page_response); - struct iommufd_fault *fault = filep->private_data; - struct iommu_hwpt_page_response response; - struct iopf_group *group; - size_t done = 0; - int rc = 0; - - if (*ppos || count % response_size) - return -ESPIPE; - - mutex_lock(&fault->mutex); - while (count > done) { - rc = copy_from_user(&response, buf + done, response_size); - if (rc) - break; - - static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS == - (int)IOMMU_PAGE_RESP_SUCCESS); - static_assert((int)IOMMUFD_PAGE_RESP_INVALID == - (int)IOMMU_PAGE_RESP_INVALID); - if (response.code != IOMMUFD_PAGE_RESP_SUCCESS && - response.code != IOMMUFD_PAGE_RESP_INVALID) { - rc = -EINVAL; - break; - } - - group = xa_erase(&fault->response, response.cookie); - if (!group) { - rc = -EINVAL; - break; - } - - iopf_group_response(group, response.code); - iopf_free_group(group); - done += response_size; - } - mutex_unlock(&fault->mutex); - - return done == 0 ? rc : done; -} - -static __poll_t iommufd_fault_fops_poll(struct file *filep, - struct poll_table_struct *wait) -{ - struct iommufd_fault *fault = filep->private_data; - __poll_t pollflags = EPOLLOUT; - - poll_wait(filep, &fault->wait_queue, wait); - spin_lock(&fault->lock); - if (!list_empty(&fault->deliver)) - pollflags |= EPOLLIN | EPOLLRDNORM; - spin_unlock(&fault->lock); - - return pollflags; -} - -static int iommufd_fault_fops_release(struct inode *inode, struct file *filep) -{ - struct iommufd_fault *fault = filep->private_data; - - refcount_dec(&fault->obj.users); - iommufd_ctx_put(fault->ictx); - return 0; -} - -static const struct file_operations iommufd_fault_fops = { - .owner = THIS_MODULE, - .open = nonseekable_open, - .read = iommufd_fault_fops_read, - .write = iommufd_fault_fops_write, - .poll = iommufd_fault_fops_poll, - .release = iommufd_fault_fops_release, -}; - -int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) -{ - struct iommu_fault_alloc *cmd = ucmd->cmd; - struct iommufd_fault *fault; - struct file *filep; - int fdno; - int rc; - - if (cmd->flags) - return -EOPNOTSUPP; - - fault = iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT); - if (IS_ERR(fault)) - return PTR_ERR(fault); - - fault->ictx = ucmd->ictx; - INIT_LIST_HEAD(&fault->deliver); - xa_init_flags(&fault->response, XA_FLAGS_ALLOC1); - mutex_init(&fault->mutex); - spin_lock_init(&fault->lock); - init_waitqueue_head(&fault->wait_queue); - - filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops, - fault, O_RDWR); - if (IS_ERR(filep)) { - rc = PTR_ERR(filep); - goto out_abort; - } - - refcount_inc(&fault->obj.users); - iommufd_ctx_get(fault->ictx); - fault->filep = filep; - - fdno = get_unused_fd_flags(O_CLOEXEC); - if (fdno < 0) { - rc = fdno; - goto out_fput; - } - - cmd->out_fault_id = fault->obj.id; - cmd->out_fault_fd = fdno; - - rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); - if (rc) - goto out_put_fdno; - iommufd_object_finalize(ucmd->ictx, &fault->obj); - - fd_install(fdno, fault->filep); - - return 0; -out_put_fdno: - put_unused_fd(fdno); -out_fput: - fput(filep); -out_abort: - iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj); - - return rc; -} - -int iommufd_fault_iopf_handler(struct iopf_group *group) -{ - struct iommufd_hw_pagetable *hwpt; - struct iommufd_fault *fault; - - hwpt = group->attach_handle->domain->fault_data; - fault = hwpt->fault; - - spin_lock(&fault->lock); - list_add_tail(&group->node, &fault->deliver); - spin_unlock(&fault->lock); - - wake_up_interruptible(&fault->wait_queue); - - return 0; -} diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 598be26a14e2..487779470261 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -14,7 +14,7 @@ static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt) iommu_domain_free(hwpt->domain); if (hwpt->fault) - refcount_dec(&hwpt->fault->obj.users); + refcount_dec(&hwpt->fault->common.obj.users); } void iommufd_hwpt_paging_destroy(struct iommufd_object *obj) @@ -90,6 +90,7 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging) * @ictx: iommufd context * @ioas: IOAS to associate the domain with * @idev: Device to get an iommu_domain for + * @pasid: PASID to get an iommu_domain for * @flags: Flags from userspace * @immediate_attach: True if idev should be attached to the hwpt * @user_data: The user provided driver specific data describing the domain to @@ -105,13 +106,14 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging) */ struct iommufd_hwpt_paging * iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, - struct iommufd_device *idev, u32 flags, - bool immediate_attach, + struct iommufd_device *idev, ioasid_t pasid, + u32 flags, bool immediate_attach, const struct iommu_user_data *user_data) { const u32 valid_flags = IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING | - IOMMU_HWPT_FAULT_ID_VALID; + IOMMU_HWPT_FAULT_ID_VALID | + IOMMU_HWPT_ALLOC_PASID; const struct iommu_ops *ops = dev_iommu_ops(idev->dev); struct iommufd_hwpt_paging *hwpt_paging; struct iommufd_hw_pagetable *hwpt; @@ -126,12 +128,16 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) && !device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) return ERR_PTR(-EOPNOTSUPP); + if ((flags & IOMMU_HWPT_FAULT_ID_VALID) && + (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)) + return ERR_PTR(-EOPNOTSUPP); hwpt_paging = __iommufd_object_alloc( ictx, hwpt_paging, IOMMUFD_OBJ_HWPT_PAGING, common.obj); if (IS_ERR(hwpt_paging)) return ERR_CAST(hwpt_paging); hwpt = &hwpt_paging->common; + hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID; INIT_LIST_HEAD(&hwpt_paging->hwpt_item); /* Pairs with iommufd_hw_pagetable_destroy() */ @@ -156,6 +162,8 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, goto out_abort; } } + hwpt->domain->iommufd_hwpt = hwpt; + hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; /* * Set the coherency mode before we do iopt_table_add_domain() as some @@ -184,7 +192,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, * sequence. Once those drivers are fixed this should be removed. */ if (immediate_attach) { - rc = iommufd_hw_pagetable_attach(hwpt, idev); + rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid); if (rc) goto out_abort; } @@ -197,7 +205,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, out_detach: if (immediate_attach) - iommufd_hw_pagetable_detach(idev); + iommufd_hw_pagetable_detach(idev, pasid); out_abort: iommufd_object_abort_and_destroy(ictx, &hwpt->obj); return ERR_PTR(rc); @@ -226,7 +234,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, struct iommufd_hw_pagetable *hwpt; int rc; - if ((flags & ~IOMMU_HWPT_FAULT_ID_VALID) || + if ((flags & ~(IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID)) || !user_data->len || !ops->domain_alloc_nested) return ERR_PTR(-EOPNOTSUPP); if (parent->auto_domain || !parent->nest_parent || @@ -238,6 +246,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, if (IS_ERR(hwpt_nested)) return ERR_CAST(hwpt_nested); hwpt = &hwpt_nested->common; + hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID; refcount_inc(&parent->common.obj.users); hwpt_nested->parent = parent; @@ -251,6 +260,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, goto out_abort; } hwpt->domain->owner = ops; + hwpt->domain->iommufd_hwpt = hwpt; + hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { rc = -EINVAL; @@ -280,7 +291,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, struct iommufd_hw_pagetable *hwpt; int rc; - if (flags & ~IOMMU_HWPT_FAULT_ID_VALID) + if (flags & ~(IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID)) return ERR_PTR(-EOPNOTSUPP); if (!user_data->len) return ERR_PTR(-EOPNOTSUPP); @@ -292,6 +303,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, if (IS_ERR(hwpt_nested)) return ERR_CAST(hwpt_nested); hwpt = &hwpt_nested->common; + hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID; hwpt_nested->viommu = viommu; refcount_inc(&viommu->obj.users); @@ -306,7 +318,9 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, hwpt->domain = NULL; goto out_abort; } + hwpt->domain->iommufd_hwpt = hwpt; hwpt->domain->owner = viommu->iommu_dev->ops; + hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { rc = -EINVAL; @@ -355,8 +369,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) ioas = container_of(pt_obj, struct iommufd_ioas, obj); mutex_lock(&ioas->mutex); hwpt_paging = iommufd_hwpt_paging_alloc( - ucmd->ictx, ioas, idev, cmd->flags, false, - user_data.len ? &user_data : NULL); + ucmd->ictx, ioas, idev, IOMMU_NO_PASID, cmd->flags, + false, user_data.len ? &user_data : NULL); if (IS_ERR(hwpt_paging)) { rc = PTR_ERR(hwpt_paging); goto out_unlock; @@ -406,9 +420,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) } hwpt->fault = fault; hwpt->domain->iopf_handler = iommufd_fault_iopf_handler; - hwpt->domain->fault_data = hwpt; - refcount_inc(&fault->obj.users); - iommufd_put_object(ucmd->ictx, &fault->obj); + refcount_inc(&fault->common.obj.users); + iommufd_put_object(ucmd->ictx, &fault->common.obj); } cmd->out_hwpt_id = hwpt->obj.id; diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 8a790e597e12..c3a316fd1ef9 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -70,36 +70,45 @@ struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter) return iter->area; } -static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, - unsigned long length, - unsigned long iova_alignment, - unsigned long page_offset) +static bool __alloc_iova_check_range(unsigned long *start, unsigned long last, + unsigned long length, + unsigned long iova_alignment, + unsigned long page_offset) { - if (span->is_used || span->last_hole - span->start_hole < length - 1) + unsigned long aligned_start; + + /* ALIGN_UP() */ + if (check_add_overflow(*start, iova_alignment - 1, &aligned_start)) return false; + aligned_start &= ~(iova_alignment - 1); + aligned_start |= page_offset; - span->start_hole = ALIGN(span->start_hole, iova_alignment) | - page_offset; - if (span->start_hole > span->last_hole || - span->last_hole - span->start_hole < length - 1) + if (aligned_start >= last || last - aligned_start < length - 1) return false; + *start = aligned_start; return true; } -static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, +static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, unsigned long length, unsigned long iova_alignment, unsigned long page_offset) { - if (span->is_hole || span->last_used - span->start_used < length - 1) + if (span->is_used) return false; + return __alloc_iova_check_range(&span->start_hole, span->last_hole, + length, iova_alignment, page_offset); +} - span->start_used = ALIGN(span->start_used, iova_alignment) | - page_offset; - if (span->start_used > span->last_used || - span->last_used - span->start_used < length - 1) +static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, + unsigned long length, + unsigned long iova_alignment, + unsigned long page_offset) +{ + if (span->is_hole) return false; - return true; + return __alloc_iova_check_range(&span->start_used, span->last_used, + length, iova_alignment, page_offset); } /* @@ -743,8 +752,10 @@ again: iommufd_access_notify_unmap(iopt, area_first, length); /* Something is not responding to unmap requests. */ tries++; - if (WARN_ON(tries > 100)) - return -EDEADLOCK; + if (WARN_ON(tries > 100)) { + rc = -EDEADLOCK; + goto out_unmapped; + } goto again; } @@ -766,6 +777,7 @@ again: out_unlock_iova: up_write(&iopt->iova_rwsem); up_read(&iopt->domains_rwsem); +out_unmapped: if (unmapped) *unmapped = unmapped_bytes; return rc; diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 02fe1ada97cc..9ccc83341f32 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -19,6 +19,25 @@ struct iommu_group; struct iommu_option; struct iommufd_device; +struct iommufd_sw_msi_map { + struct list_head sw_msi_item; + phys_addr_t sw_msi_start; + phys_addr_t msi_addr; + unsigned int pgoff; + unsigned int id; +}; + +/* Bitmap of struct iommufd_sw_msi_map::id */ +struct iommufd_sw_msi_maps { + DECLARE_BITMAP(bitmap, 64); +}; + +#ifdef CONFIG_IRQ_MSI_IOMMU +int iommufd_sw_msi_install(struct iommufd_ctx *ictx, + struct iommufd_hwpt_paging *hwpt_paging, + struct iommufd_sw_msi_map *msi_map); +#endif + struct iommufd_ctx { struct file *file; struct xarray objects; @@ -26,6 +45,10 @@ struct iommufd_ctx { wait_queue_head_t destroy_wait; struct rw_semaphore ioas_creation_lock; + struct mutex sw_msi_lock; + struct list_head sw_msi_list; + unsigned int sw_msi_id; + u8 account_mode; /* Compatibility with VFIO no iommu */ u8 no_iommu_mode; @@ -276,6 +299,7 @@ struct iommufd_hw_pagetable { struct iommufd_object obj; struct iommu_domain *domain; struct iommufd_fault *fault; + bool pasid_compat : 1; }; struct iommufd_hwpt_paging { @@ -283,10 +307,10 @@ struct iommufd_hwpt_paging { struct iommufd_ioas *ioas; bool auto_domain : 1; bool enforce_cache_coherency : 1; - bool msi_cookie : 1; bool nest_parent : 1; /* Head at iommufd_ioas::hwpt_list */ struct list_head hwpt_item; + struct iommufd_sw_msi_maps present_sw_msi; }; struct iommufd_hwpt_nested { @@ -346,13 +370,13 @@ int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd); struct iommufd_hwpt_paging * iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, - struct iommufd_device *idev, u32 flags, - bool immediate_attach, + struct iommufd_device *idev, ioasid_t pasid, + u32 flags, bool immediate_attach, const struct iommu_user_data *user_data); int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev); + struct iommufd_device *idev, ioasid_t pasid); struct iommufd_hw_pagetable * -iommufd_hw_pagetable_detach(struct iommufd_device *idev); +iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid); void iommufd_hwpt_paging_destroy(struct iommufd_object *obj); void iommufd_hwpt_paging_abort(struct iommufd_object *obj); void iommufd_hwpt_nested_destroy(struct iommufd_object *obj); @@ -376,13 +400,15 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, refcount_dec(&hwpt->obj.users); } +struct iommufd_attach; + struct iommufd_group { struct kref ref; struct mutex lock; struct iommufd_ctx *ictx; struct iommu_group *group; - struct iommufd_hw_pagetable *hwpt; - struct list_head device_list; + struct xarray pasid_attach; + struct iommufd_sw_msi_maps required_sw_msi; phys_addr_t sw_msi_start; }; @@ -399,9 +425,6 @@ struct iommufd_device { /* always the physical device */ struct device *dev; bool enforce_cache_coherency; - /* protect iopf_enabled counter */ - struct mutex iopf_lock; - unsigned int iopf_enabled; }; static inline struct iommufd_device * @@ -433,49 +456,17 @@ void iopt_remove_access(struct io_pagetable *iopt, u32 iopt_access_list_id); void iommufd_access_destroy_object(struct iommufd_object *obj); -/* - * An iommufd_fault object represents an interface to deliver I/O page faults - * to the user space. These objects are created/destroyed by the user space and - * associated with hardware page table objects during page-table allocation. - */ -struct iommufd_fault { +struct iommufd_eventq { struct iommufd_object obj; struct iommufd_ctx *ictx; struct file *filep; spinlock_t lock; /* protects the deliver list */ struct list_head deliver; - struct mutex mutex; /* serializes response flows */ - struct xarray response; struct wait_queue_head wait_queue; }; -/* Fetch the first node out of the fault->deliver list */ -static inline struct iopf_group * -iommufd_fault_deliver_fetch(struct iommufd_fault *fault) -{ - struct list_head *list = &fault->deliver; - struct iopf_group *group = NULL; - - spin_lock(&fault->lock); - if (!list_empty(list)) { - group = list_first_entry(list, struct iopf_group, node); - list_del(&group->node); - } - spin_unlock(&fault->lock); - return group; -} - -/* Restore a node back to the head of the fault->deliver list */ -static inline void iommufd_fault_deliver_restore(struct iommufd_fault *fault, - struct iopf_group *group) -{ - spin_lock(&fault->lock); - list_add(&group->node, &fault->deliver); - spin_unlock(&fault->lock); -} - struct iommufd_attach_handle { struct iommu_attach_handle handle; struct iommufd_device *idev; @@ -484,31 +475,105 @@ struct iommufd_attach_handle { /* Convert an iommu attach handle to iommufd handle. */ #define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle) +/* + * An iommufd_fault object represents an interface to deliver I/O page faults + * to the user space. These objects are created/destroyed by the user space and + * associated with hardware page table objects during page-table allocation. + */ +struct iommufd_fault { + struct iommufd_eventq common; + struct mutex mutex; /* serializes response flows */ + struct xarray response; +}; + +static inline struct iommufd_fault * +eventq_to_fault(struct iommufd_eventq *eventq) +{ + return container_of(eventq, struct iommufd_fault, common); +} + static inline struct iommufd_fault * iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id) { return container_of(iommufd_get_object(ucmd->ictx, id, IOMMUFD_OBJ_FAULT), - struct iommufd_fault, obj); + struct iommufd_fault, common.obj); } int iommufd_fault_alloc(struct iommufd_ucmd *ucmd); void iommufd_fault_destroy(struct iommufd_object *obj); int iommufd_fault_iopf_handler(struct iopf_group *group); - -int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev); -void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev); -int iommufd_fault_domain_replace_dev(struct iommufd_device *idev, - struct iommufd_hw_pagetable *hwpt, - struct iommufd_hw_pagetable *old); - -int iommufd_fault_iopf_enable(struct iommufd_device *idev); -void iommufd_fault_iopf_disable(struct iommufd_device *idev); void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, struct iommufd_attach_handle *handle); +/* An iommufd_vevent represents a vIOMMU event in an iommufd_veventq */ +struct iommufd_vevent { + struct iommufd_vevent_header header; + struct list_head node; /* for iommufd_eventq::deliver */ + ssize_t data_len; + u64 event_data[] __counted_by(data_len); +}; + +#define vevent_for_lost_events_header(vevent) \ + (vevent->header.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS) + +/* + * An iommufd_veventq object represents an interface to deliver vIOMMU events to + * the user space. It is created/destroyed by the user space and associated with + * a vIOMMU object during the allocations. + */ +struct iommufd_veventq { + struct iommufd_eventq common; + struct iommufd_viommu *viommu; + struct list_head node; /* for iommufd_viommu::veventqs */ + struct iommufd_vevent lost_events_header; + + unsigned int type; + unsigned int depth; + + /* Use common.lock for protection */ + u32 num_events; + u32 sequence; +}; + +static inline struct iommufd_veventq * +eventq_to_veventq(struct iommufd_eventq *eventq) +{ + return container_of(eventq, struct iommufd_veventq, common); +} + +static inline struct iommufd_veventq * +iommufd_get_veventq(struct iommufd_ucmd *ucmd, u32 id) +{ + return container_of(iommufd_get_object(ucmd->ictx, id, + IOMMUFD_OBJ_VEVENTQ), + struct iommufd_veventq, common.obj); +} + +int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd); +void iommufd_veventq_destroy(struct iommufd_object *obj); +void iommufd_veventq_abort(struct iommufd_object *obj); + +static inline void iommufd_vevent_handler(struct iommufd_veventq *veventq, + struct iommufd_vevent *vevent) +{ + struct iommufd_eventq *eventq = &veventq->common; + + lockdep_assert_held(&eventq->lock); + + /* + * Remove the lost_events_header and add the new node at the same time. + * Note the new node can be lost_events_header, for a sequence update. + */ + if (list_is_last(&veventq->lost_events_header.node, &eventq->deliver)) + list_del(&veventq->lost_events_header.node); + list_add_tail(&vevent->node, &eventq->deliver); + vevent->header.sequence = veventq->sequence; + veventq->sequence = (veventq->sequence + 1) & INT_MAX; + + wake_up_interruptible(&eventq->wait_queue); +} + static inline struct iommufd_viommu * iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id) { @@ -517,6 +582,20 @@ iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id) struct iommufd_viommu, obj); } +static inline struct iommufd_veventq * +iommufd_viommu_find_veventq(struct iommufd_viommu *viommu, u32 type) +{ + struct iommufd_veventq *veventq, *next; + + lockdep_assert_held(&viommu->veventqs_rwsem); + + list_for_each_entry_safe(veventq, next, &viommu->veventqs, node) { + if (veventq->type == type) + return veventq; + } + return NULL; +} + int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd); void iommufd_viommu_destroy(struct iommufd_object *obj); int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd); diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index a6b7a163f636..1cd7e8394129 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -24,6 +24,11 @@ enum { IOMMU_TEST_OP_MD_CHECK_IOTLB, IOMMU_TEST_OP_TRIGGER_IOPF, IOMMU_TEST_OP_DEV_CHECK_CACHE, + IOMMU_TEST_OP_TRIGGER_VEVENT, + IOMMU_TEST_OP_PASID_ATTACH, + IOMMU_TEST_OP_PASID_REPLACE, + IOMMU_TEST_OP_PASID_DETACH, + IOMMU_TEST_OP_PASID_CHECK_HWPT, }; enum { @@ -48,6 +53,7 @@ enum { enum { MOCK_FLAGS_DEVICE_NO_DIRTY = 1 << 0, MOCK_FLAGS_DEVICE_HUGE_IOVA = 1 << 1, + MOCK_FLAGS_DEVICE_PASID = 1 << 2, }; enum { @@ -60,6 +66,9 @@ enum { MOCK_DEV_CACHE_NUM = 4, }; +/* Reserved for special pasid replace test */ +#define IOMMU_TEST_PASID_RESERVED 1024 + struct iommu_test_cmd { __u32 size; __u32 op; @@ -145,11 +154,36 @@ struct iommu_test_cmd { __u32 id; __u32 cache; } check_dev_cache; + struct { + __u32 dev_id; + } trigger_vevent; + struct { + __u32 pasid; + __u32 pt_id; + /* @id is stdev_id */ + } pasid_attach; + struct { + __u32 pasid; + __u32 pt_id; + /* @id is stdev_id */ + } pasid_replace; + struct { + __u32 pasid; + /* @id is stdev_id */ + } pasid_detach; + struct { + __u32 pasid; + __u32 hwpt_id; + /* @id is stdev_id */ + } pasid_check; }; __u32 last; }; #define IOMMU_TEST_CMD _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE + 32) +/* Mock device/iommu PASID width */ +#define MOCK_PASID_WIDTH 20 + /* Mock structs for IOMMU_DEVICE_GET_HW_INFO ioctl */ #define IOMMU_HW_INFO_TYPE_SELFTEST 0xfeedbeef #define IOMMU_HW_INFO_SELFTEST_REGVAL 0xdeadbeef @@ -212,4 +246,10 @@ struct iommu_viommu_invalidate_selftest { __u32 cache_id; }; +#define IOMMU_VEVENTQ_TYPE_SELFTEST 0xbeefbeef + +struct iommu_viommu_event_selftest { + __u32 virt_id; +}; + #endif diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index ccf616462a1c..3df468f64e7d 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -227,6 +227,8 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp) xa_init(&ictx->groups); ictx->file = filp; init_waitqueue_head(&ictx->destroy_wait); + mutex_init(&ictx->sw_msi_lock); + INIT_LIST_HEAD(&ictx->sw_msi_list); filp->private_data = ictx; return 0; } @@ -234,6 +236,8 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp) static int iommufd_fops_release(struct inode *inode, struct file *filp) { struct iommufd_ctx *ictx = filp->private_data; + struct iommufd_sw_msi_map *next; + struct iommufd_sw_msi_map *cur; struct iommufd_object *obj; /* @@ -262,6 +266,11 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp) break; } WARN_ON(!xa_empty(&ictx->groups)); + + mutex_destroy(&ictx->sw_msi_lock); + list_for_each_entry_safe(cur, next, &ictx->sw_msi_list, sw_msi_item) + kfree(cur); + kfree(ictx); return 0; } @@ -308,6 +317,7 @@ union ucmd_buffer { struct iommu_ioas_unmap unmap; struct iommu_option option; struct iommu_vdevice_alloc vdev; + struct iommu_veventq_alloc veventq; struct iommu_vfio_ioas vfio_ioas; struct iommu_viommu_alloc viommu; #ifdef CONFIG_IOMMUFD_TEST @@ -363,6 +373,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, val64), IOCTL_OP(IOMMU_VDEVICE_ALLOC, iommufd_vdevice_alloc_ioctl, struct iommu_vdevice_alloc, virt_id), + IOCTL_OP(IOMMU_VEVENTQ_ALLOC, iommufd_veventq_alloc, + struct iommu_veventq_alloc, out_veventq_fd), IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas, __reserved), IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl, @@ -505,6 +517,10 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { [IOMMUFD_OBJ_VDEVICE] = { .destroy = iommufd_vdevice_destroy, }, + [IOMMUFD_OBJ_VEVENTQ] = { + .destroy = iommufd_veventq_destroy, + .abort = iommufd_veventq_abort, + }, [IOMMUFD_OBJ_VIOMMU] = { .destroy = iommufd_viommu_destroy, }, diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index d40deb0a4f06..6bd0abf9a641 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -58,6 +58,9 @@ enum { MOCK_PFN_HUGE_IOVA = _MOCK_PFN_START << 2, }; +static int mock_dev_enable_iopf(struct device *dev, struct iommu_domain *domain); +static void mock_dev_disable_iopf(struct device *dev, struct iommu_domain *domain); + /* * Syzkaller has trouble randomizing the correct iova to use since it is linked * to the map ioctl's output, and it has no ide about that. So, simplify things. @@ -161,9 +164,15 @@ enum selftest_obj_type { struct mock_dev { struct device dev; + struct mock_viommu *viommu; + struct rw_semaphore viommu_rwsem; unsigned long flags; + unsigned long vdev_id; int id; u32 cache[MOCK_DEV_CACHE_NUM]; + atomic_t pasid_1024_fake_error; + unsigned int iopf_refcount; + struct iommu_domain *domain; }; static inline struct mock_dev *to_mock_dev(struct device *dev) @@ -193,15 +202,85 @@ static int mock_domain_nop_attach(struct iommu_domain *domain, struct device *dev) { struct mock_dev *mdev = to_mock_dev(dev); + struct mock_viommu *new_viommu = NULL; + unsigned long vdev_id = 0; + int rc; if (domain->dirty_ops && (mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY)) return -EINVAL; + iommu_group_mutex_assert(dev); + if (domain->type == IOMMU_DOMAIN_NESTED) { + new_viommu = to_mock_nested(domain)->mock_viommu; + if (new_viommu) { + rc = iommufd_viommu_get_vdev_id(&new_viommu->core, dev, + &vdev_id); + if (rc) + return rc; + } + } + if (new_viommu != mdev->viommu) { + down_write(&mdev->viommu_rwsem); + mdev->viommu = new_viommu; + mdev->vdev_id = vdev_id; + up_write(&mdev->viommu_rwsem); + } + + rc = mock_dev_enable_iopf(dev, domain); + if (rc) + return rc; + + mock_dev_disable_iopf(dev, mdev->domain); + mdev->domain = domain; + + return 0; +} + +static int mock_domain_set_dev_pasid_nop(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid, + struct iommu_domain *old) +{ + struct mock_dev *mdev = to_mock_dev(dev); + int rc; + + /* + * Per the first attach with pasid 1024, set the + * mdev->pasid_1024_fake_error. Hence the second call of this op + * can fake an error to validate the error path of the core. This + * is helpful to test the case in which the iommu core needs to + * rollback to the old domain due to driver failure. e.g. replace. + * User should be careful about the third call of this op, it shall + * succeed since the mdev->pasid_1024_fake_error is cleared in the + * second call. + */ + if (pasid == 1024) { + if (domain->type == IOMMU_DOMAIN_BLOCKED) { + atomic_set(&mdev->pasid_1024_fake_error, 0); + } else if (atomic_read(&mdev->pasid_1024_fake_error)) { + /* + * Clear the flag, and fake an error to fail the + * replacement. + */ + atomic_set(&mdev->pasid_1024_fake_error, 0); + return -ENOMEM; + } else { + /* Set the flag to fake an error in next call */ + atomic_set(&mdev->pasid_1024_fake_error, 1); + } + } + + rc = mock_dev_enable_iopf(dev, domain); + if (rc) + return rc; + + mock_dev_disable_iopf(dev, old); + return 0; } static const struct iommu_domain_ops mock_blocking_ops = { .attach_dev = mock_domain_nop_attach, + .set_dev_pasid = mock_domain_set_dev_pasid_nop }; static struct iommu_domain mock_blocking_domain = { @@ -343,7 +422,7 @@ mock_domain_alloc_nested(struct device *dev, struct iommu_domain *parent, struct mock_iommu_domain_nested *mock_nested; struct mock_iommu_domain *mock_parent; - if (flags) + if (flags & ~IOMMU_HWPT_ALLOC_PASID) return ERR_PTR(-EOPNOTSUPP); if (!parent || parent->ops != mock_ops.default_domain_ops) return ERR_PTR(-EINVAL); @@ -365,7 +444,8 @@ mock_domain_alloc_paging_flags(struct device *dev, u32 flags, { bool has_dirty_flag = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING | - IOMMU_HWPT_ALLOC_NEST_PARENT; + IOMMU_HWPT_ALLOC_NEST_PARENT | + IOMMU_HWPT_ALLOC_PASID; struct mock_dev *mdev = to_mock_dev(dev); bool no_dirty_ops = mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY; struct mock_iommu_domain *mock; @@ -549,22 +629,42 @@ static void mock_domain_page_response(struct device *dev, struct iopf_fault *evt { } -static int mock_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) +static int mock_dev_enable_iopf(struct device *dev, struct iommu_domain *domain) { - if (feat != IOMMU_DEV_FEAT_IOPF || !mock_iommu_iopf_queue) + struct mock_dev *mdev = to_mock_dev(dev); + int ret; + + if (!domain || !domain->iopf_handler) + return 0; + + if (!mock_iommu_iopf_queue) return -ENODEV; - return iopf_queue_add_device(mock_iommu_iopf_queue, dev); + if (mdev->iopf_refcount) { + mdev->iopf_refcount++; + return 0; + } + + ret = iopf_queue_add_device(mock_iommu_iopf_queue, dev); + if (ret) + return ret; + + mdev->iopf_refcount = 1; + + return 0; } -static int mock_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) +static void mock_dev_disable_iopf(struct device *dev, struct iommu_domain *domain) { - if (feat != IOMMU_DEV_FEAT_IOPF || !mock_iommu_iopf_queue) - return -ENODEV; + struct mock_dev *mdev = to_mock_dev(dev); - iopf_queue_remove_device(mock_iommu_iopf_queue, dev); + if (!domain || !domain->iopf_handler) + return; - return 0; + if (--mdev->iopf_refcount) + return; + + iopf_queue_remove_device(mock_iommu_iopf_queue, dev); } static void mock_viommu_destroy(struct iommufd_viommu *viommu) @@ -585,7 +685,7 @@ mock_viommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, struct mock_viommu *mock_viommu = to_mock_viommu(viommu); struct mock_iommu_domain_nested *mock_nested; - if (flags) + if (flags & ~IOMMU_HWPT_ALLOC_PASID) return ERR_PTR(-EOPNOTSUPP); mock_nested = __mock_domain_alloc_nested(user_data); @@ -709,8 +809,6 @@ static const struct iommu_ops mock_ops = { .device_group = generic_device_group, .probe_device = mock_probe_device, .page_response = mock_domain_page_response, - .dev_enable_feat = mock_dev_enable_feat, - .dev_disable_feat = mock_dev_disable_feat, .user_pasid_table = true, .viommu_alloc = mock_viommu_alloc, .default_domain_ops = @@ -720,6 +818,7 @@ static const struct iommu_ops mock_ops = { .map_pages = mock_domain_map_pages, .unmap_pages = mock_domain_unmap_pages, .iova_to_phys = mock_domain_iova_to_phys, + .set_dev_pasid = mock_domain_set_dev_pasid_nop, }, }; @@ -780,6 +879,7 @@ static struct iommu_domain_ops domain_nested_ops = { .free = mock_domain_free_nested, .attach_dev = mock_domain_nop_attach, .cache_invalidate_user = mock_domain_cache_invalidate_user, + .set_dev_pasid = mock_domain_set_dev_pasid_nop, }; static inline struct iommufd_hw_pagetable * @@ -839,17 +939,24 @@ static void mock_dev_release(struct device *dev) static struct mock_dev *mock_dev_create(unsigned long dev_flags) { + struct property_entry prop[] = { + PROPERTY_ENTRY_U32("pasid-num-bits", 0), + {}, + }; + const u32 valid_flags = MOCK_FLAGS_DEVICE_NO_DIRTY | + MOCK_FLAGS_DEVICE_HUGE_IOVA | + MOCK_FLAGS_DEVICE_PASID; struct mock_dev *mdev; int rc, i; - if (dev_flags & - ~(MOCK_FLAGS_DEVICE_NO_DIRTY | MOCK_FLAGS_DEVICE_HUGE_IOVA)) + if (dev_flags & ~valid_flags) return ERR_PTR(-EINVAL); mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); if (!mdev) return ERR_PTR(-ENOMEM); + init_rwsem(&mdev->viommu_rwsem); device_initialize(&mdev->dev); mdev->flags = dev_flags; mdev->dev.release = mock_dev_release; @@ -866,6 +973,15 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags) if (rc) goto err_put; + if (dev_flags & MOCK_FLAGS_DEVICE_PASID) + prop[0] = PROPERTY_ENTRY_U32("pasid-num-bits", MOCK_PASID_WIDTH); + + rc = device_create_managed_software_node(&mdev->dev, prop, NULL); + if (rc) { + dev_err(&mdev->dev, "add pasid-num-bits property failed, rc: %d", rc); + goto err_put; + } + rc = device_add(&mdev->dev); if (rc) goto err_put; @@ -921,7 +1037,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, } sobj->idev.idev = idev; - rc = iommufd_device_attach(idev, &pt_id); + rc = iommufd_device_attach(idev, IOMMU_NO_PASID, &pt_id); if (rc) goto out_unbind; @@ -936,7 +1052,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, return 0; out_detach: - iommufd_device_detach(idev); + iommufd_device_detach(idev, IOMMU_NO_PASID); out_unbind: iommufd_device_unbind(idev); out_mdev: @@ -946,39 +1062,49 @@ out_sobj: return rc; } -/* Replace the mock domain with a manually allocated hw_pagetable */ -static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd, - unsigned int device_id, u32 pt_id, - struct iommu_test_cmd *cmd) +static struct selftest_obj * +iommufd_test_get_selftest_obj(struct iommufd_ctx *ictx, u32 id) { struct iommufd_object *dev_obj; struct selftest_obj *sobj; - int rc; /* * Prefer to use the OBJ_SELFTEST because the destroy_rwsem will ensure * it doesn't race with detach, which is not allowed. */ - dev_obj = - iommufd_get_object(ucmd->ictx, device_id, IOMMUFD_OBJ_SELFTEST); + dev_obj = iommufd_get_object(ictx, id, IOMMUFD_OBJ_SELFTEST); if (IS_ERR(dev_obj)) - return PTR_ERR(dev_obj); + return ERR_CAST(dev_obj); sobj = to_selftest_obj(dev_obj); if (sobj->type != TYPE_IDEV) { - rc = -EINVAL; - goto out_dev_obj; + iommufd_put_object(ictx, dev_obj); + return ERR_PTR(-EINVAL); } + return sobj; +} - rc = iommufd_device_replace(sobj->idev.idev, &pt_id); +/* Replace the mock domain with a manually allocated hw_pagetable */ +static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd, + unsigned int device_id, u32 pt_id, + struct iommu_test_cmd *cmd) +{ + struct selftest_obj *sobj; + int rc; + + sobj = iommufd_test_get_selftest_obj(ucmd->ictx, device_id); + if (IS_ERR(sobj)) + return PTR_ERR(sobj); + + rc = iommufd_device_replace(sobj->idev.idev, IOMMU_NO_PASID, &pt_id); if (rc) - goto out_dev_obj; + goto out_sobj; cmd->mock_domain_replace.pt_id = pt_id; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); -out_dev_obj: - iommufd_put_object(ucmd->ictx, dev_obj); +out_sobj: + iommufd_put_object(ucmd->ictx, &sobj->obj); return rc; } @@ -1597,13 +1723,166 @@ static int iommufd_test_trigger_iopf(struct iommufd_ucmd *ucmd, return 0; } +static int iommufd_test_trigger_vevent(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + struct iommu_viommu_event_selftest test = {}; + struct iommufd_device *idev; + struct mock_dev *mdev; + int rc = -ENOENT; + + idev = iommufd_get_device(ucmd, cmd->trigger_vevent.dev_id); + if (IS_ERR(idev)) + return PTR_ERR(idev); + mdev = to_mock_dev(idev->dev); + + down_read(&mdev->viommu_rwsem); + if (!mdev->viommu || !mdev->vdev_id) + goto out_unlock; + + test.virt_id = mdev->vdev_id; + rc = iommufd_viommu_report_event(&mdev->viommu->core, + IOMMU_VEVENTQ_TYPE_SELFTEST, &test, + sizeof(test)); +out_unlock: + up_read(&mdev->viommu_rwsem); + iommufd_put_object(ucmd->ictx, &idev->obj); + + return rc; +} + +static inline struct iommufd_hw_pagetable * +iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id) +{ + struct iommufd_object *pt_obj; + + pt_obj = iommufd_get_object(ucmd->ictx, id, IOMMUFD_OBJ_ANY); + if (IS_ERR(pt_obj)) + return ERR_CAST(pt_obj); + + if (pt_obj->type != IOMMUFD_OBJ_HWPT_NESTED && + pt_obj->type != IOMMUFD_OBJ_HWPT_PAGING) { + iommufd_put_object(ucmd->ictx, pt_obj); + return ERR_PTR(-EINVAL); + } + + return container_of(pt_obj, struct iommufd_hw_pagetable, obj); +} + +static int iommufd_test_pasid_check_hwpt(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + u32 hwpt_id = cmd->pasid_check.hwpt_id; + struct iommu_domain *attached_domain; + struct iommu_attach_handle *handle; + struct iommufd_hw_pagetable *hwpt; + struct selftest_obj *sobj; + struct mock_dev *mdev; + int rc = 0; + + sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id); + if (IS_ERR(sobj)) + return PTR_ERR(sobj); + + mdev = sobj->idev.mock_dev; + + handle = iommu_attach_handle_get(mdev->dev.iommu_group, + cmd->pasid_check.pasid, 0); + if (IS_ERR(handle)) + attached_domain = NULL; + else + attached_domain = handle->domain; + + /* hwpt_id == 0 means to check if pasid is detached */ + if (!hwpt_id) { + if (attached_domain) + rc = -EINVAL; + goto out_sobj; + } + + hwpt = iommufd_get_hwpt(ucmd, hwpt_id); + if (IS_ERR(hwpt)) { + rc = PTR_ERR(hwpt); + goto out_sobj; + } + + if (attached_domain != hwpt->domain) + rc = -EINVAL; + + iommufd_put_object(ucmd->ictx, &hwpt->obj); +out_sobj: + iommufd_put_object(ucmd->ictx, &sobj->obj); + return rc; +} + +static int iommufd_test_pasid_attach(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + struct selftest_obj *sobj; + int rc; + + sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id); + if (IS_ERR(sobj)) + return PTR_ERR(sobj); + + rc = iommufd_device_attach(sobj->idev.idev, cmd->pasid_attach.pasid, + &cmd->pasid_attach.pt_id); + if (rc) + goto out_sobj; + + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + iommufd_device_detach(sobj->idev.idev, + cmd->pasid_attach.pasid); + +out_sobj: + iommufd_put_object(ucmd->ictx, &sobj->obj); + return rc; +} + +static int iommufd_test_pasid_replace(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + struct selftest_obj *sobj; + int rc; + + sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id); + if (IS_ERR(sobj)) + return PTR_ERR(sobj); + + rc = iommufd_device_replace(sobj->idev.idev, cmd->pasid_attach.pasid, + &cmd->pasid_attach.pt_id); + if (rc) + goto out_sobj; + + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + +out_sobj: + iommufd_put_object(ucmd->ictx, &sobj->obj); + return rc; +} + +static int iommufd_test_pasid_detach(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + struct selftest_obj *sobj; + + sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id); + if (IS_ERR(sobj)) + return PTR_ERR(sobj); + + iommufd_device_detach(sobj->idev.idev, cmd->pasid_detach.pasid); + iommufd_put_object(ucmd->ictx, &sobj->obj); + return 0; +} + void iommufd_selftest_destroy(struct iommufd_object *obj) { struct selftest_obj *sobj = to_selftest_obj(obj); switch (sobj->type) { case TYPE_IDEV: - iommufd_device_detach(sobj->idev.idev); + iommufd_device_detach(sobj->idev.idev, IOMMU_NO_PASID); iommufd_device_unbind(sobj->idev.idev); mock_dev_destroy(sobj->idev.mock_dev); break; @@ -1678,6 +1957,16 @@ int iommufd_test(struct iommufd_ucmd *ucmd) cmd->dirty.flags); case IOMMU_TEST_OP_TRIGGER_IOPF: return iommufd_test_trigger_iopf(ucmd, cmd); + case IOMMU_TEST_OP_TRIGGER_VEVENT: + return iommufd_test_trigger_vevent(ucmd, cmd); + case IOMMU_TEST_OP_PASID_ATTACH: + return iommufd_test_pasid_attach(ucmd, cmd); + case IOMMU_TEST_OP_PASID_REPLACE: + return iommufd_test_pasid_replace(ucmd, cmd); + case IOMMU_TEST_OP_PASID_DETACH: + return iommufd_test_pasid_detach(ucmd, cmd); + case IOMMU_TEST_OP_PASID_CHECK_HWPT: + return iommufd_test_pasid_check_hwpt(ucmd, cmd); default: return -EOPNOTSUPP; } @@ -1724,6 +2013,7 @@ int __init iommufd_test_init(void) init_completion(&mock_iommu.complete); mock_iommu_iopf_queue = iopf_queue_alloc("mock-iopfq"); + mock_iommu.iommu_dev.max_pasids = (1 << MOCK_PASID_WIDTH); return 0; diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index 69b88e8c7c26..01df2b985f02 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -59,6 +59,8 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) viommu->ictx = ucmd->ictx; viommu->hwpt = hwpt_paging; refcount_inc(&viommu->hwpt->common.obj.users); + INIT_LIST_HEAD(&viommu->veventqs); + init_rwsem(&viommu->veventqs_rwsem); /* * It is the most likely case that a physical IOMMU is unpluggable. A * pluggable IOMMU instance (if exists) is responsible for refcounting diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 074daf1aac4e..90341b24a811 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1081,31 +1081,25 @@ static int ipmmu_probe(struct platform_device *pdev) } } + platform_set_drvdata(pdev, mmu); /* * Register the IPMMU to the IOMMU subsystem in the following cases: * - R-Car Gen2 IPMMU (all devices registered) * - R-Car Gen3 IPMMU (leaf devices only - skip root IPMMU-MM device) */ - if (!mmu->features->has_cache_leaf_nodes || !ipmmu_is_root(mmu)) { - ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, - dev_name(&pdev->dev)); - if (ret) - return ret; - - ret = iommu_device_register(&mmu->iommu, &ipmmu_ops, &pdev->dev); - if (ret) - return ret; - } + if (mmu->features->has_cache_leaf_nodes && ipmmu_is_root(mmu)) + return 0; - /* - * We can't create the ARM mapping here as it requires the bus to have - * an IOMMU, which only happens when bus_set_iommu() is called in - * ipmmu_init() after the probe function returns. - */ + ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, "%s", + dev_name(&pdev->dev)); + if (ret) + return ret; - platform_set_drvdata(pdev, mmu); + ret = iommu_device_register(&mmu->iommu, &ipmmu_ops, &pdev->dev); + if (ret) + iommu_device_sysfs_remove(&mmu->iommu); - return 0; + return ret; } static void ipmmu_remove(struct platform_device *pdev) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index df98d0c65f54..cb95fecf6016 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1550,6 +1550,31 @@ static const struct mtk_iommu_plat_data mt6795_data = { .larbid_remap = {{0}, {1}, {2}, {3}, {4}}, /* Linear mapping. */ }; +static const unsigned int mt8192_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = { + [0] = {~0, ~0}, /* Region0: larb0/1 */ + [1] = {0, 0, 0, 0, ~0, ~0, 0, ~0}, /* Region1: larb4/5/7 */ + [2] = {0, 0, ~0, 0, 0, 0, 0, 0, /* Region2: larb2/9/11/13/14/16/17/18/19/20 */ + 0, ~0, 0, ~0, 0, ~(u32)(BIT(9) | BIT(10)), ~(u32)(BIT(4) | BIT(5)), 0, + ~0, ~0, ~0, ~0, ~0}, + [3] = {0}, + [4] = {[13] = BIT(9) | BIT(10)}, /* larb13 port9/10 */ + [5] = {[14] = BIT(4) | BIT(5)}, /* larb14 port4/5 */ +}; + +static const struct mtk_iommu_plat_data mt6893_data = { + .m4u_plat = M4U_MT8192, + .flags = HAS_BCLK | OUT_ORDER_WR_EN | HAS_SUB_COMM_2BITS | + WR_THROT_EN | IOVA_34_EN | SHARE_PGTABLE | MTK_IOMMU_TYPE_MM, + .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .banks_num = 1, + .banks_enable = {true}, + .iova_region = mt8192_multi_dom, + .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), + .iova_region_larb_msk = mt8192_larb_region_msk, + .larbid_remap = {{0}, {1}, {4, 5}, {7}, {2}, {9, 11, 19, 20}, + {0, 14, 16}, {0, 13, 18, 17}}, +}; + static const struct mtk_iommu_plat_data mt8167_data = { .m4u_plat = M4U_MT8167, .flags = RESET_AXI | HAS_LEGACY_IVRP_PADDR | MTK_IOMMU_TYPE_MM, @@ -1673,17 +1698,6 @@ static const struct mtk_iommu_plat_data mt8188_data_vpp = { 27, 28 /* ccu0 */, MTK_INVALID_LARBID}, {4, 6}}, }; -static const unsigned int mt8192_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = { - [0] = {~0, ~0}, /* Region0: larb0/1 */ - [1] = {0, 0, 0, 0, ~0, ~0, 0, ~0}, /* Region1: larb4/5/7 */ - [2] = {0, 0, ~0, 0, 0, 0, 0, 0, /* Region2: larb2/9/11/13/14/16/17/18/19/20 */ - 0, ~0, 0, ~0, 0, ~(u32)(BIT(9) | BIT(10)), ~(u32)(BIT(4) | BIT(5)), 0, - ~0, ~0, ~0, ~0, ~0}, - [3] = {0}, - [4] = {[13] = BIT(9) | BIT(10)}, /* larb13 port9/10 */ - [5] = {[14] = BIT(4) | BIT(5)}, /* larb14 port4/5 */ -}; - static const struct mtk_iommu_plat_data mt8192_data = { .m4u_plat = M4U_MT8192, .flags = HAS_BCLK | HAS_SUB_COMM_2BITS | OUT_ORDER_WR_EN | @@ -1777,6 +1791,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data}, { .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data}, { .compatible = "mediatek,mt6795-m4u", .data = &mt6795_data}, + { .compatible = "mediatek,mt6893-iommu-mm", .data = &mt6893_data}, { .compatible = "mediatek,mt8167-m4u", .data = &mt8167_data}, { .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data}, { .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data}, diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index a565b9e40f4a..66824982e05f 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -27,11 +27,20 @@ #include <linux/spinlock.h> #include <linux/string_choices.h> #include <asm/barrier.h> -#include <asm/dma-iommu.h> #include <dt-bindings/memory/mtk-memory-port.h> #include <dt-bindings/memory/mt2701-larb-port.h> #include <soc/mediatek/smi.h> +#if defined(CONFIG_ARM) +#include <asm/dma-iommu.h> +#else +#define arm_iommu_create_mapping(...) NULL +#define arm_iommu_attach_device(...) -ENODEV +struct dma_iommu_mapping { + struct iommu_domain *domain; +}; +#endif + #define REG_MMU_PT_BASE_ADDR 0x000 #define F_ALL_INVLD 0x2 @@ -446,22 +455,13 @@ static int mtk_iommu_v1_create_mapping(struct device *dev, static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev) { - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct iommu_fwspec *fwspec = NULL; struct of_phandle_args iommu_spec; struct mtk_iommu_v1_data *data; int err, idx = 0, larbid, larbidx; struct device_link *link; struct device *larbdev; - /* - * In the deferred case, free the existed fwspec. - * Always initialize the fwspec internally. - */ - if (fwspec) { - iommu_fwspec_free(dev); - fwspec = dev_iommu_fwspec_get(dev); - } - while (!of_parse_phandle_with_args(dev->of_node, "iommus", "#iommu-cells", idx, &iommu_spec)) { @@ -476,6 +476,9 @@ static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev) idx++; } + if (!fwspec) + return ERR_PTR(-ENODEV); + data = dev_iommu_priv_get(dev); /* Link the consumer device with the smi-larb device(supplier) */ diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 97987cd78da9..6b989a62def2 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -116,6 +116,7 @@ static void of_pci_check_device_ats(struct device *dev, struct device_node *np) int of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id) { + bool dev_iommu_present; int err; if (!master_np) @@ -127,6 +128,7 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np, mutex_unlock(&iommu_probe_device_lock); return 0; } + dev_iommu_present = dev->iommu; /* * We don't currently walk up the tree looking for a parent IOMMU. @@ -147,11 +149,18 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np, err = of_iommu_configure_device(master_np, dev, id); } - if (err) + if (err && dev_iommu_present) iommu_fwspec_free(dev); + else if (err && dev->iommu) + dev_iommu_free(dev); mutex_unlock(&iommu_probe_device_lock); - if (!err && dev->bus) + /* + * If we're not on the iommu_probe_device() path (as indicated by the + * initial dev->iommu) then try to simulate it. This should no longer + * happen unless of_dma_configure() is being misused outside bus code. + */ + if (!err && dev->bus && !dev_iommu_present) err = iommu_probe_device(dev); if (err && err != -EPROBE_DEFER) diff --git a/drivers/iommu/riscv/Makefile b/drivers/iommu/riscv/Makefile index f54c9ed17d41..b5929f9f23e6 100644 --- a/drivers/iommu/riscv/Makefile +++ b/drivers/iommu/riscv/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_RISCV_IOMMU) += iommu.o iommu-platform.o +obj-y += iommu.o iommu-platform.o obj-$(CONFIG_RISCV_IOMMU_PCI) += iommu-pci.o diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c index 8f049d4a0e2c..bb57092ca901 100644 --- a/drivers/iommu/riscv/iommu.c +++ b/drivers/iommu/riscv/iommu.c @@ -48,14 +48,13 @@ static DEFINE_IDA(riscv_iommu_pscids); /* Device resource-managed allocations */ struct riscv_iommu_devres { void *addr; - int order; }; static void riscv_iommu_devres_pages_release(struct device *dev, void *res) { struct riscv_iommu_devres *devres = res; - iommu_free_pages(devres->addr, devres->order); + iommu_free_pages(devres->addr); } static int riscv_iommu_devres_pages_match(struct device *dev, void *res, void *p) @@ -66,13 +65,14 @@ static int riscv_iommu_devres_pages_match(struct device *dev, void *res, void *p return devres->addr == target->addr; } -static void *riscv_iommu_get_pages(struct riscv_iommu_device *iommu, int order) +static void *riscv_iommu_get_pages(struct riscv_iommu_device *iommu, + unsigned int size) { struct riscv_iommu_devres *devres; void *addr; - addr = iommu_alloc_pages_node(dev_to_node(iommu->dev), - GFP_KERNEL_ACCOUNT, order); + addr = iommu_alloc_pages_node_sz(dev_to_node(iommu->dev), + GFP_KERNEL_ACCOUNT, size); if (unlikely(!addr)) return NULL; @@ -80,12 +80,11 @@ static void *riscv_iommu_get_pages(struct riscv_iommu_device *iommu, int order) sizeof(struct riscv_iommu_devres), GFP_KERNEL); if (unlikely(!devres)) { - iommu_free_pages(addr, order); + iommu_free_pages(addr); return NULL; } devres->addr = addr; - devres->order = order; devres_add(iommu->dev, devres); @@ -163,9 +162,9 @@ static int riscv_iommu_queue_alloc(struct riscv_iommu_device *iommu, } else { do { const size_t queue_size = entry_size << (logsz + 1); - const int order = get_order(queue_size); - queue->base = riscv_iommu_get_pages(iommu, order); + queue->base = riscv_iommu_get_pages( + iommu, max(queue_size, SZ_4K)); queue->phys = __pa(queue->base); } while (!queue->base && logsz-- > 0); } @@ -620,7 +619,7 @@ static struct riscv_iommu_dc *riscv_iommu_get_dc(struct riscv_iommu_device *iomm break; } - ptr = riscv_iommu_get_pages(iommu, 0); + ptr = riscv_iommu_get_pages(iommu, SZ_4K); if (!ptr) return NULL; @@ -700,7 +699,7 @@ static int riscv_iommu_iodir_alloc(struct riscv_iommu_device *iommu) } if (!iommu->ddt_root) { - iommu->ddt_root = riscv_iommu_get_pages(iommu, 0); + iommu->ddt_root = riscv_iommu_get_pages(iommu, SZ_4K); iommu->ddt_phys = __pa(iommu->ddt_root); } @@ -1087,7 +1086,8 @@ static void riscv_iommu_iotlb_sync(struct iommu_domain *iommu_domain, #define _io_pte_entry(pn, prot) ((_PAGE_PFN_MASK & ((pn) << _PAGE_PFN_SHIFT)) | (prot)) static void riscv_iommu_pte_free(struct riscv_iommu_domain *domain, - unsigned long pte, struct list_head *freelist) + unsigned long pte, + struct iommu_pages_list *freelist) { unsigned long *ptr; int i; @@ -1105,9 +1105,9 @@ static void riscv_iommu_pte_free(struct riscv_iommu_domain *domain, } if (freelist) - list_add_tail(&virt_to_page(ptr)->lru, freelist); + iommu_pages_list_add(freelist, ptr); else - iommu_free_page(ptr); + iommu_free_pages(ptr); } static unsigned long *riscv_iommu_pte_alloc(struct riscv_iommu_domain *domain, @@ -1144,13 +1144,14 @@ pte_retry: * page table. This might race with other mappings, retry. */ if (_io_pte_none(pte)) { - addr = iommu_alloc_page_node(domain->numa_node, gfp); + addr = iommu_alloc_pages_node_sz(domain->numa_node, gfp, + SZ_4K); if (!addr) return NULL; old = pte; pte = _io_pte_entry(virt_to_pfn(addr), _PAGE_TABLE); if (cmpxchg_relaxed(ptr, old, pte) != old) { - iommu_free_page(addr); + iommu_free_pages(addr); goto pte_retry; } } @@ -1194,7 +1195,7 @@ static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain, unsigned long *ptr; unsigned long pte, old, pte_prot; int rc = 0; - LIST_HEAD(freelist); + struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist); if (!(prot & IOMMU_WRITE)) pte_prot = _PAGE_BASE | _PAGE_READ; @@ -1225,7 +1226,7 @@ static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain, *mapped = size; - if (!list_empty(&freelist)) { + if (!iommu_pages_list_empty(&freelist)) { /* * In 1.0 spec version, the smallest scope we can use to * invalidate all levels of page table (i.e. leaf and non-leaf) @@ -1385,8 +1386,8 @@ static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev) domain->numa_node = dev_to_node(iommu->dev); domain->amo_enabled = !!(iommu->caps & RISCV_IOMMU_CAPABILITIES_AMO_HWAD); domain->pgd_mode = pgd_mode; - domain->pgd_root = iommu_alloc_page_node(domain->numa_node, - GFP_KERNEL_ACCOUNT); + domain->pgd_root = iommu_alloc_pages_node_sz(domain->numa_node, + GFP_KERNEL_ACCOUNT, SZ_4K); if (!domain->pgd_root) { kfree(domain); return ERR_PTR(-ENOMEM); @@ -1395,7 +1396,7 @@ static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev) domain->pscid = ida_alloc_range(&riscv_iommu_pscids, 1, RISCV_IOMMU_MAX_PSCID, GFP_KERNEL); if (domain->pscid < 0) { - iommu_free_page(domain->pgd_root); + iommu_free_pages(domain->pgd_root); kfree(domain); return ERR_PTR(-ENOMEM); } diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 323cc665c357..e6bb3c784017 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -88,6 +88,7 @@ struct rk_iommu_domain { dma_addr_t dt_dma; spinlock_t iommus_lock; /* lock for iommus list */ spinlock_t dt_lock; /* lock for modifying page directory table */ + struct device *dma_dev; struct iommu_domain domain; }; @@ -123,7 +124,6 @@ struct rk_iommudata { struct rk_iommu *iommu; }; -static struct device *dma_dev; static const struct rk_iommu_ops *rk_ops; static struct iommu_domain rk_identity_domain; @@ -132,7 +132,7 @@ static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma, { size_t size = count * sizeof(u32); /* count of u32 entry */ - dma_sync_single_for_device(dma_dev, dma, size, DMA_TO_DEVICE); + dma_sync_single_for_device(dom->dma_dev, dma, size, DMA_TO_DEVICE); } static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom) @@ -730,14 +730,15 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain, if (rk_dte_is_pt_valid(dte)) goto done; - page_table = iommu_alloc_page(GFP_ATOMIC | rk_ops->gfp_flags); + page_table = iommu_alloc_pages_sz(GFP_ATOMIC | rk_ops->gfp_flags, + SPAGE_SIZE); if (!page_table) return ERR_PTR(-ENOMEM); - pt_dma = dma_map_single(dma_dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE); - if (dma_mapping_error(dma_dev, pt_dma)) { - dev_err(dma_dev, "DMA mapping error while allocating page table\n"); - iommu_free_page(page_table); + pt_dma = dma_map_single(rk_domain->dma_dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(rk_domain->dma_dev, pt_dma)) { + dev_err(rk_domain->dma_dev, "DMA mapping error while allocating page table\n"); + iommu_free_pages(page_table); return ERR_PTR(-ENOMEM); } @@ -1051,9 +1052,7 @@ static int rk_iommu_attach_device(struct iommu_domain *domain, static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev) { struct rk_iommu_domain *rk_domain; - - if (!dma_dev) - return NULL; + struct rk_iommu *iommu; rk_domain = kzalloc(sizeof(*rk_domain), GFP_KERNEL); if (!rk_domain) @@ -1064,14 +1063,17 @@ static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev) * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries. * Allocate one 4 KiB page for each table. */ - rk_domain->dt = iommu_alloc_page(GFP_KERNEL | rk_ops->gfp_flags); + rk_domain->dt = iommu_alloc_pages_sz(GFP_KERNEL | rk_ops->gfp_flags, + SPAGE_SIZE); if (!rk_domain->dt) goto err_free_domain; - rk_domain->dt_dma = dma_map_single(dma_dev, rk_domain->dt, + iommu = rk_iommu_from_dev(dev); + rk_domain->dma_dev = iommu->dev; + rk_domain->dt_dma = dma_map_single(rk_domain->dma_dev, rk_domain->dt, SPAGE_SIZE, DMA_TO_DEVICE); - if (dma_mapping_error(dma_dev, rk_domain->dt_dma)) { - dev_err(dma_dev, "DMA map error for DT\n"); + if (dma_mapping_error(rk_domain->dma_dev, rk_domain->dt_dma)) { + dev_err(rk_domain->dma_dev, "DMA map error for DT\n"); goto err_free_dt; } @@ -1086,7 +1088,7 @@ static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev) return &rk_domain->domain; err_free_dt: - iommu_free_page(rk_domain->dt); + iommu_free_pages(rk_domain->dt); err_free_domain: kfree(rk_domain); @@ -1105,15 +1107,15 @@ static void rk_iommu_domain_free(struct iommu_domain *domain) if (rk_dte_is_pt_valid(dte)) { phys_addr_t pt_phys = rk_ops->pt_address(dte); u32 *page_table = phys_to_virt(pt_phys); - dma_unmap_single(dma_dev, pt_phys, + dma_unmap_single(rk_domain->dma_dev, pt_phys, SPAGE_SIZE, DMA_TO_DEVICE); - iommu_free_page(page_table); + iommu_free_pages(page_table); } } - dma_unmap_single(dma_dev, rk_domain->dt_dma, + dma_unmap_single(rk_domain->dma_dev, rk_domain->dt_dma, SPAGE_SIZE, DMA_TO_DEVICE); - iommu_free_page(rk_domain->dt); + iommu_free_pages(rk_domain->dt); kfree(rk_domain); } @@ -1148,14 +1150,13 @@ static int rk_iommu_of_xlate(struct device *dev, struct platform_device *iommu_dev; struct rk_iommudata *data; - data = devm_kzalloc(dma_dev, sizeof(*data), GFP_KERNEL); + iommu_dev = of_find_device_by_node(args->np); + + data = devm_kzalloc(&iommu_dev->dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; - iommu_dev = of_find_device_by_node(args->np); - data->iommu = platform_get_drvdata(iommu_dev); - data->iommu->domain = &rk_identity_domain; dev_iommu_priv_set(dev, data); platform_device_put(iommu_dev); @@ -1193,6 +1194,8 @@ static int rk_iommu_probe(struct platform_device *pdev) if (!iommu) return -ENOMEM; + iommu->domain = &rk_identity_domain; + platform_set_drvdata(pdev, iommu); iommu->dev = dev; iommu->num_mmu = 0; @@ -1256,22 +1259,6 @@ static int rk_iommu_probe(struct platform_device *pdev) if (err) return err; - err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev)); - if (err) - goto err_unprepare_clocks; - - err = iommu_device_register(&iommu->iommu, &rk_iommu_ops, dev); - if (err) - goto err_remove_sysfs; - - /* - * Use the first registered IOMMU device for domain to use with DMA - * API, since a domain might not physically correspond to a single - * IOMMU device.. - */ - if (!dma_dev) - dma_dev = &pdev->dev; - pm_runtime_enable(dev); for (i = 0; i < iommu->num_irq; i++) { @@ -1290,12 +1277,19 @@ static int rk_iommu_probe(struct platform_device *pdev) dma_set_mask_and_coherent(dev, rk_ops->dma_bit_mask); + err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev)); + if (err) + goto err_pm_disable; + + err = iommu_device_register(&iommu->iommu, &rk_iommu_ops, dev); + if (err) + goto err_remove_sysfs; + return 0; -err_pm_disable: - pm_runtime_disable(dev); err_remove_sysfs: iommu_device_sysfs_remove(&iommu->iommu); -err_unprepare_clocks: +err_pm_disable: + pm_runtime_disable(dev); clk_bulk_unprepare(iommu->num_clocks, iommu->clocks); return err; } diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index fbdeded3d48b..433b59f43530 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -16,7 +16,7 @@ #include "dma-iommu.h" -static const struct iommu_ops s390_iommu_ops; +static const struct iommu_ops s390_iommu_ops, s390_iommu_rtr_ops; static struct kmem_cache *dma_region_table_cache; static struct kmem_cache *dma_page_table_cache; @@ -31,10 +31,21 @@ struct s390_domain { unsigned long *dma_table; spinlock_t list_lock; struct rcu_head rcu; + u8 origin_type; }; static struct iommu_domain blocking_domain; +static inline unsigned int calc_rfx(dma_addr_t ptr) +{ + return ((unsigned long)ptr >> ZPCI_RF_SHIFT) & ZPCI_INDEX_MASK; +} + +static inline unsigned int calc_rsx(dma_addr_t ptr) +{ + return ((unsigned long)ptr >> ZPCI_RS_SHIFT) & ZPCI_INDEX_MASK; +} + static inline unsigned int calc_rtx(dma_addr_t ptr) { return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK; @@ -56,6 +67,20 @@ static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa) *entry |= (pfaa & ZPCI_PTE_ADDR_MASK); } +static inline void set_rf_rso(unsigned long *entry, phys_addr_t rso) +{ + *entry &= ZPCI_RTE_FLAG_MASK; + *entry |= (rso & ZPCI_RTE_ADDR_MASK); + *entry |= ZPCI_TABLE_TYPE_RFX; +} + +static inline void set_rs_rto(unsigned long *entry, phys_addr_t rto) +{ + *entry &= ZPCI_RTE_FLAG_MASK; + *entry |= (rto & ZPCI_RTE_ADDR_MASK); + *entry |= ZPCI_TABLE_TYPE_RSX; +} + static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto) { *entry &= ZPCI_RTE_FLAG_MASK; @@ -70,6 +95,22 @@ static inline void set_st_pto(unsigned long *entry, phys_addr_t pto) *entry |= ZPCI_TABLE_TYPE_SX; } +static inline void validate_rf_entry(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_VALID_MASK; + *entry &= ~ZPCI_TABLE_OFFSET_MASK; + *entry |= ZPCI_TABLE_VALID; + *entry |= ZPCI_TABLE_LEN_RFX; +} + +static inline void validate_rs_entry(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_VALID_MASK; + *entry &= ~ZPCI_TABLE_OFFSET_MASK; + *entry |= ZPCI_TABLE_VALID; + *entry |= ZPCI_TABLE_LEN_RSX; +} + static inline void validate_rt_entry(unsigned long *entry) { *entry &= ~ZPCI_TABLE_VALID_MASK; @@ -120,6 +161,22 @@ static inline int pt_entry_isvalid(unsigned long entry) return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID; } +static inline unsigned long *get_rf_rso(unsigned long entry) +{ + if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RFX) + return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK); + else + return NULL; +} + +static inline unsigned long *get_rs_rto(unsigned long entry) +{ + if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RSX) + return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK); + else + return NULL; +} + static inline unsigned long *get_rt_sto(unsigned long entry) { if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX) @@ -191,18 +248,59 @@ static void dma_free_seg_table(unsigned long entry) dma_free_cpu_table(sto); } -static void dma_cleanup_tables(unsigned long *table) +static void dma_free_rt_table(unsigned long entry) { + unsigned long *rto = get_rs_rto(entry); int rtx; - if (!table) + for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) + if (reg_entry_isvalid(rto[rtx])) + dma_free_seg_table(rto[rtx]); + + dma_free_cpu_table(rto); +} + +static void dma_free_rs_table(unsigned long entry) +{ + unsigned long *rso = get_rf_rso(entry); + int rsx; + + for (rsx = 0; rsx < ZPCI_TABLE_ENTRIES; rsx++) + if (reg_entry_isvalid(rso[rsx])) + dma_free_rt_table(rso[rsx]); + + dma_free_cpu_table(rso); +} + +static void dma_cleanup_tables(struct s390_domain *domain) +{ + int rtx, rsx, rfx; + + if (!domain->dma_table) return; - for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) - if (reg_entry_isvalid(table[rtx])) - dma_free_seg_table(table[rtx]); + switch (domain->origin_type) { + case ZPCI_TABLE_TYPE_RFX: + for (rfx = 0; rfx < ZPCI_TABLE_ENTRIES; rfx++) + if (reg_entry_isvalid(domain->dma_table[rfx])) + dma_free_rs_table(domain->dma_table[rfx]); + break; + case ZPCI_TABLE_TYPE_RSX: + for (rsx = 0; rsx < ZPCI_TABLE_ENTRIES; rsx++) + if (reg_entry_isvalid(domain->dma_table[rsx])) + dma_free_rt_table(domain->dma_table[rsx]); + break; + case ZPCI_TABLE_TYPE_RTX: + for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) + if (reg_entry_isvalid(domain->dma_table[rtx])) + dma_free_seg_table(domain->dma_table[rtx]); + break; + default: + WARN_ONCE(1, "Invalid IOMMU table (%x)\n", domain->origin_type); + return; + } - dma_free_cpu_table(table); + dma_free_cpu_table(domain->dma_table); } static unsigned long *dma_alloc_page_table(gfp_t gfp) @@ -218,6 +316,70 @@ static unsigned long *dma_alloc_page_table(gfp_t gfp) return table; } +static unsigned long *dma_walk_rs_table(unsigned long *rso, + dma_addr_t dma_addr, gfp_t gfp) +{ + unsigned int rsx = calc_rsx(dma_addr); + unsigned long old_rse, rse; + unsigned long *rsep, *rto; + + rsep = &rso[rsx]; + rse = READ_ONCE(*rsep); + if (reg_entry_isvalid(rse)) { + rto = get_rs_rto(rse); + } else { + rto = dma_alloc_cpu_table(gfp); + if (!rto) + return NULL; + + set_rs_rto(&rse, virt_to_phys(rto)); + validate_rs_entry(&rse); + entry_clr_protected(&rse); + + old_rse = cmpxchg(rsep, ZPCI_TABLE_INVALID, rse); + if (old_rse != ZPCI_TABLE_INVALID) { + /* Somone else was faster, use theirs */ + dma_free_cpu_table(rto); + rto = get_rs_rto(old_rse); + } + } + return rto; +} + +static unsigned long *dma_walk_rf_table(unsigned long *rfo, + dma_addr_t dma_addr, gfp_t gfp) +{ + unsigned int rfx = calc_rfx(dma_addr); + unsigned long old_rfe, rfe; + unsigned long *rfep, *rso; + + rfep = &rfo[rfx]; + rfe = READ_ONCE(*rfep); + if (reg_entry_isvalid(rfe)) { + rso = get_rf_rso(rfe); + } else { + rso = dma_alloc_cpu_table(gfp); + if (!rso) + return NULL; + + set_rf_rso(&rfe, virt_to_phys(rso)); + validate_rf_entry(&rfe); + entry_clr_protected(&rfe); + + old_rfe = cmpxchg(rfep, ZPCI_TABLE_INVALID, rfe); + if (old_rfe != ZPCI_TABLE_INVALID) { + /* Somone else was faster, use theirs */ + dma_free_cpu_table(rso); + rso = get_rf_rso(old_rfe); + } + } + + if (!rso) + return NULL; + + return dma_walk_rs_table(rso, dma_addr, gfp); +} + static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp) { unsigned long old_rte, rte; @@ -271,11 +433,31 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp) return pto; } -static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp) +static unsigned long *dma_walk_region_tables(struct s390_domain *domain, + dma_addr_t dma_addr, gfp_t gfp) { - unsigned long *sto, *pto; + switch (domain->origin_type) { + case ZPCI_TABLE_TYPE_RFX: + return dma_walk_rf_table(domain->dma_table, dma_addr, gfp); + case ZPCI_TABLE_TYPE_RSX: + return dma_walk_rs_table(domain->dma_table, dma_addr, gfp); + case ZPCI_TABLE_TYPE_RTX: + return domain->dma_table; + default: + return NULL; + } +} + +static unsigned long *dma_walk_cpu_trans(struct s390_domain *domain, + dma_addr_t dma_addr, gfp_t gfp) +{ + unsigned long *rto, *sto, *pto; unsigned int rtx, sx, px; + rto = dma_walk_region_tables(domain, dma_addr, gfp); + if (!rto) + return NULL; + rtx = calc_rtx(dma_addr); sto = dma_get_seg_table_origin(&rto[rtx], gfp); if (!sto) @@ -329,9 +511,25 @@ static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap) } } +static inline u64 max_tbl_size(struct s390_domain *domain) +{ + switch (domain->origin_type) { + case ZPCI_TABLE_TYPE_RTX: + return ZPCI_TABLE_SIZE_RT - 1; + case ZPCI_TABLE_TYPE_RSX: + return ZPCI_TABLE_SIZE_RS - 1; + case ZPCI_TABLE_TYPE_RFX: + return U64_MAX; + default: + return 0; + } +} + static struct iommu_domain *s390_domain_alloc_paging(struct device *dev) { + struct zpci_dev *zdev = to_zpci_dev(dev); struct s390_domain *s390_domain; + u64 aperture_size; s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL); if (!s390_domain) @@ -342,9 +540,26 @@ static struct iommu_domain *s390_domain_alloc_paging(struct device *dev) kfree(s390_domain); return NULL; } + + aperture_size = min(s390_iommu_aperture, + zdev->end_dma - zdev->start_dma + 1); + if (aperture_size <= (ZPCI_TABLE_SIZE_RT - zdev->start_dma)) { + s390_domain->origin_type = ZPCI_TABLE_TYPE_RTX; + } else if (aperture_size <= (ZPCI_TABLE_SIZE_RS - zdev->start_dma) && + (zdev->dtsm & ZPCI_IOTA_DT_RS)) { + s390_domain->origin_type = ZPCI_TABLE_TYPE_RSX; + } else if (zdev->dtsm & ZPCI_IOTA_DT_RF) { + s390_domain->origin_type = ZPCI_TABLE_TYPE_RFX; + } else { + /* Assume RTX available */ + s390_domain->origin_type = ZPCI_TABLE_TYPE_RTX; + aperture_size = ZPCI_TABLE_SIZE_RT - zdev->start_dma; + } + zdev->end_dma = zdev->start_dma + aperture_size - 1; + s390_domain->domain.geometry.force_aperture = true; s390_domain->domain.geometry.aperture_start = 0; - s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1; + s390_domain->domain.geometry.aperture_end = max_tbl_size(s390_domain); spin_lock_init(&s390_domain->list_lock); INIT_LIST_HEAD_RCU(&s390_domain->devices); @@ -356,7 +571,7 @@ static void s390_iommu_rcu_free_domain(struct rcu_head *head) { struct s390_domain *s390_domain = container_of(head, struct s390_domain, rcu); - dma_cleanup_tables(s390_domain->dma_table); + dma_cleanup_tables(s390_domain); kfree(s390_domain); } @@ -381,6 +596,61 @@ static void zdev_s390_domain_update(struct zpci_dev *zdev, spin_unlock_irqrestore(&zdev->dom_lock, flags); } +static u64 get_iota_region_flag(struct s390_domain *domain) +{ + switch (domain->origin_type) { + case ZPCI_TABLE_TYPE_RTX: + return ZPCI_IOTA_RTTO_FLAG; + case ZPCI_TABLE_TYPE_RSX: + return ZPCI_IOTA_RSTO_FLAG; + case ZPCI_TABLE_TYPE_RFX: + return ZPCI_IOTA_RFTO_FLAG; + default: + WARN_ONCE(1, "Invalid IOMMU table (%x)\n", domain->origin_type); + return 0; + } +} + +static int s390_iommu_domain_reg_ioat(struct zpci_dev *zdev, + struct iommu_domain *domain, u8 *status) +{ + struct s390_domain *s390_domain; + int rc = 0; + u64 iota; + + switch (domain->type) { + case IOMMU_DOMAIN_IDENTITY: + rc = zpci_register_ioat(zdev, 0, zdev->start_dma, + zdev->end_dma, 0, status); + break; + case IOMMU_DOMAIN_BLOCKED: + /* Nothing to do in this case */ + break; + default: + s390_domain = to_s390_domain(domain); + iota = virt_to_phys(s390_domain->dma_table) | + get_iota_region_flag(s390_domain); + rc = zpci_register_ioat(zdev, 0, zdev->start_dma, + zdev->end_dma, iota, status); + } + + return rc; +} + +int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status) +{ + unsigned long flags; + int rc; + + spin_lock_irqsave(&zdev->dom_lock, flags); + + rc = s390_iommu_domain_reg_ioat(zdev, zdev->s390_domain, status); + + spin_unlock_irqrestore(&zdev->dom_lock, flags); + + return rc; +} + static int blocking_domain_attach_device(struct iommu_domain *domain, struct device *dev) { @@ -392,9 +662,11 @@ static int blocking_domain_attach_device(struct iommu_domain *domain, return 0; s390_domain = to_s390_domain(zdev->s390_domain); - spin_lock_irqsave(&s390_domain->list_lock, flags); - list_del_rcu(&zdev->iommu_list); - spin_unlock_irqrestore(&s390_domain->list_lock, flags); + if (zdev->dma_table) { + spin_lock_irqsave(&s390_domain->list_lock, flags); + list_del_rcu(&zdev->iommu_list); + spin_unlock_irqrestore(&s390_domain->list_lock, flags); + } zpci_unregister_ioat(zdev, 0); zdev->dma_table = NULL; @@ -422,8 +694,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, blocking_domain_attach_device(&blocking_domain, dev); /* If we fail now DMA remains blocked via blocking domain */ - cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(s390_domain->dma_table), &status); + cc = s390_iommu_domain_reg_ioat(zdev, domain, &status); if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL) return -EIO; zdev->dma_table = s390_domain->dma_table; @@ -441,6 +712,8 @@ static void s390_iommu_get_resv_regions(struct device *dev, { struct zpci_dev *zdev = to_zpci_dev(dev); struct iommu_resv_region *region; + u64 max_size, end_resv; + unsigned long flags; if (zdev->start_dma) { region = iommu_alloc_resv_region(0, zdev->start_dma, 0, @@ -450,10 +723,21 @@ static void s390_iommu_get_resv_regions(struct device *dev, list_add_tail(®ion->list, list); } - if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) { - region = iommu_alloc_resv_region(zdev->end_dma + 1, - ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1, - 0, IOMMU_RESV_RESERVED, GFP_KERNEL); + spin_lock_irqsave(&zdev->dom_lock, flags); + if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED || + zdev->s390_domain->type == IOMMU_DOMAIN_IDENTITY) { + spin_unlock_irqrestore(&zdev->dom_lock, flags); + return; + } + + max_size = max_tbl_size(to_s390_domain(zdev->s390_domain)); + spin_unlock_irqrestore(&zdev->dom_lock, flags); + + if (zdev->end_dma < max_size) { + end_resv = max_size - zdev->end_dma; + region = iommu_alloc_resv_region(zdev->end_dma + 1, end_resv, + 0, IOMMU_RESV_RESERVED, + GFP_KERNEL); if (!region) return; list_add_tail(®ion->list, list); @@ -469,13 +753,9 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev) zdev = to_zpci_dev(dev); - if (zdev->start_dma > zdev->end_dma || - zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1) + if (zdev->start_dma > zdev->end_dma) return ERR_PTR(-EINVAL); - if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1) - zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1; - if (zdev->tlb_refresh) dev->iommu->shadow_on_flush = 1; @@ -565,8 +845,7 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain, int rc; for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr, - gfp); + entry = dma_walk_cpu_trans(s390_domain, dma_addr, gfp); if (unlikely(!entry)) { rc = -ENOMEM; goto undo_cpu_trans; @@ -581,8 +860,7 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain, undo_cpu_trans: while (i-- > 0) { dma_addr -= PAGE_SIZE; - entry = dma_walk_cpu_trans(s390_domain->dma_table, - dma_addr, gfp); + entry = dma_walk_cpu_trans(s390_domain, dma_addr, gfp); if (!entry) break; dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); @@ -599,8 +877,7 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain, int rc = 0; for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr, - GFP_ATOMIC); + entry = dma_walk_cpu_trans(s390_domain, dma_addr, GFP_ATOMIC); if (unlikely(!entry)) { rc = -EINVAL; break; @@ -644,6 +921,51 @@ static int s390_iommu_map_pages(struct iommu_domain *domain, return rc; } +static unsigned long *get_rso_from_iova(struct s390_domain *domain, + dma_addr_t iova) +{ + unsigned long *rfo; + unsigned long rfe; + unsigned int rfx; + + switch (domain->origin_type) { + case ZPCI_TABLE_TYPE_RFX: + rfo = domain->dma_table; + rfx = calc_rfx(iova); + rfe = READ_ONCE(rfo[rfx]); + if (!reg_entry_isvalid(rfe)) + return NULL; + return get_rf_rso(rfe); + case ZPCI_TABLE_TYPE_RSX: + return domain->dma_table; + default: + return NULL; + } +} + +static unsigned long *get_rto_from_iova(struct s390_domain *domain, + dma_addr_t iova) +{ + unsigned long *rso; + unsigned long rse; + unsigned int rsx; + + switch (domain->origin_type) { + case ZPCI_TABLE_TYPE_RFX: + case ZPCI_TABLE_TYPE_RSX: + rso = get_rso_from_iova(domain, iova); + rsx = calc_rsx(iova); + rse = READ_ONCE(rso[rsx]); + if (!reg_entry_isvalid(rse)) + return NULL; + return get_rs_rto(rse); + case ZPCI_TABLE_TYPE_RTX: + return domain->dma_table; + default: + return NULL; + } +} + static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { @@ -657,10 +979,13 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, iova > domain->geometry.aperture_end) return 0; + rto = get_rto_from_iova(s390_domain, iova); + if (!rto) + return 0; + rtx = calc_rtx(iova); sx = calc_sx(iova); px = calc_px(iova); - rto = s390_domain->dma_table; rte = READ_ONCE(rto[rtx]); if (reg_entry_isvalid(rte)) { @@ -715,7 +1040,6 @@ struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev) int zpci_init_iommu(struct zpci_dev *zdev) { - u64 aperture_size; int rc = 0; rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL, @@ -723,16 +1047,16 @@ int zpci_init_iommu(struct zpci_dev *zdev) if (rc) goto out_err; - rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops, NULL); + if (zdev->rtr_avail) { + rc = iommu_device_register(&zdev->iommu_dev, + &s390_iommu_rtr_ops, NULL); + } else { + rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops, + NULL); + } if (rc) goto out_sysfs; - zdev->start_dma = PAGE_ALIGN(zdev->start_dma); - aperture_size = min3(s390_iommu_aperture, - ZPCI_TABLE_SIZE_RT - zdev->start_dma, - zdev->end_dma - zdev->start_dma + 1); - zdev->end_dma = zdev->start_dma + aperture_size - 1; - return 0; out_sysfs: @@ -787,6 +1111,39 @@ static int __init s390_iommu_init(void) } subsys_initcall(s390_iommu_init); +static int s390_attach_dev_identity(struct iommu_domain *domain, + struct device *dev) +{ + struct zpci_dev *zdev = to_zpci_dev(dev); + u8 status; + int cc; + + blocking_domain_attach_device(&blocking_domain, dev); + + /* If we fail now DMA remains blocked via blocking domain */ + cc = s390_iommu_domain_reg_ioat(zdev, domain, &status); + + /* + * If the device is undergoing error recovery the reset code + * will re-establish the new domain. + */ + if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL) + return -EIO; + + zdev_s390_domain_update(zdev, domain); + + return 0; +} + +static const struct iommu_domain_ops s390_identity_ops = { + .attach_dev = s390_attach_dev_identity, +}; + +static struct iommu_domain s390_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &s390_identity_ops, +}; + static struct iommu_domain blocking_domain = { .type = IOMMU_DOMAIN_BLOCKED, .ops = &(const struct iommu_domain_ops) { @@ -794,23 +1151,31 @@ static struct iommu_domain blocking_domain = { } }; -static const struct iommu_ops s390_iommu_ops = { - .blocked_domain = &blocking_domain, - .release_domain = &blocking_domain, - .capable = s390_iommu_capable, - .domain_alloc_paging = s390_domain_alloc_paging, - .probe_device = s390_iommu_probe_device, - .device_group = generic_device_group, - .pgsize_bitmap = SZ_4K, - .get_resv_regions = s390_iommu_get_resv_regions, - .default_domain_ops = &(const struct iommu_domain_ops) { - .attach_dev = s390_iommu_attach_device, - .map_pages = s390_iommu_map_pages, - .unmap_pages = s390_iommu_unmap_pages, - .flush_iotlb_all = s390_iommu_flush_iotlb_all, - .iotlb_sync = s390_iommu_iotlb_sync, - .iotlb_sync_map = s390_iommu_iotlb_sync_map, - .iova_to_phys = s390_iommu_iova_to_phys, - .free = s390_domain_free, +#define S390_IOMMU_COMMON_OPS() \ + .blocked_domain = &blocking_domain, \ + .release_domain = &blocking_domain, \ + .capable = s390_iommu_capable, \ + .domain_alloc_paging = s390_domain_alloc_paging, \ + .probe_device = s390_iommu_probe_device, \ + .device_group = generic_device_group, \ + .pgsize_bitmap = SZ_4K, \ + .get_resv_regions = s390_iommu_get_resv_regions, \ + .default_domain_ops = &(const struct iommu_domain_ops) { \ + .attach_dev = s390_iommu_attach_device, \ + .map_pages = s390_iommu_map_pages, \ + .unmap_pages = s390_iommu_unmap_pages, \ + .flush_iotlb_all = s390_iommu_flush_iotlb_all, \ + .iotlb_sync = s390_iommu_iotlb_sync, \ + .iotlb_sync_map = s390_iommu_iotlb_sync_map, \ + .iova_to_phys = s390_iommu_iova_to_phys, \ + .free = s390_domain_free, \ } + +static const struct iommu_ops s390_iommu_ops = { + S390_IOMMU_COMMON_OPS() +}; + +static const struct iommu_ops s390_iommu_rtr_ops = { + .identity_domain = &s390_identity_domain, + S390_IOMMU_COMMON_OPS() }; diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 8d8f11854676..76c9620af4bb 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -690,8 +690,8 @@ sun50i_iommu_domain_alloc_paging(struct device *dev) if (!sun50i_domain) return NULL; - sun50i_domain->dt = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, - get_order(DT_SIZE)); + sun50i_domain->dt = + iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32, DT_SIZE); if (!sun50i_domain->dt) goto err_free_domain; @@ -713,7 +713,7 @@ static void sun50i_iommu_domain_free(struct iommu_domain *domain) { struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); - iommu_free_pages(sun50i_domain->dt, get_order(DT_SIZE)); + iommu_free_pages(sun50i_domain->dt); sun50i_domain->dt = NULL; kfree(sun50i_domain); diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 7f633bb5efef..e58fe9d8b9e7 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -51,14 +51,17 @@ struct tegra_smmu { struct iommu_device iommu; /* IOMMU Core code handle */ }; +struct tegra_pd; +struct tegra_pt; + struct tegra_smmu_as { struct iommu_domain domain; struct tegra_smmu *smmu; unsigned int use_count; spinlock_t lock; u32 *count; - struct page **pts; - struct page *pd; + struct tegra_pt **pts; + struct tegra_pd *pd; dma_addr_t pd_dma; unsigned id; u32 attr; @@ -155,6 +158,14 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset) #define SMMU_PDE_ATTR (SMMU_PDE_READABLE | SMMU_PDE_WRITABLE | \ SMMU_PDE_NONSECURE) +struct tegra_pd { + u32 val[SMMU_NUM_PDE]; +}; + +struct tegra_pt { + u32 val[SMMU_NUM_PTE]; +}; + static unsigned int iova_pd_index(unsigned long iova) { return (iova >> SMMU_PDE_SHIFT) & (SMMU_NUM_PDE - 1); @@ -284,7 +295,7 @@ static struct iommu_domain *tegra_smmu_domain_alloc_paging(struct device *dev) as->attr = SMMU_PD_READABLE | SMMU_PD_WRITABLE | SMMU_PD_NONSECURE; - as->pd = __iommu_alloc_pages(GFP_KERNEL | __GFP_DMA, 0); + as->pd = iommu_alloc_pages_sz(GFP_KERNEL | __GFP_DMA, SMMU_SIZE_PD); if (!as->pd) { kfree(as); return NULL; @@ -292,7 +303,7 @@ static struct iommu_domain *tegra_smmu_domain_alloc_paging(struct device *dev) as->count = kcalloc(SMMU_NUM_PDE, sizeof(u32), GFP_KERNEL); if (!as->count) { - __iommu_free_pages(as->pd, 0); + iommu_free_pages(as->pd); kfree(as); return NULL; } @@ -300,7 +311,7 @@ static struct iommu_domain *tegra_smmu_domain_alloc_paging(struct device *dev) as->pts = kcalloc(SMMU_NUM_PDE, sizeof(*as->pts), GFP_KERNEL); if (!as->pts) { kfree(as->count); - __iommu_free_pages(as->pd, 0); + iommu_free_pages(as->pd); kfree(as); return NULL; } @@ -417,8 +428,8 @@ static int tegra_smmu_as_prepare(struct tegra_smmu *smmu, goto unlock; } - as->pd_dma = dma_map_page(smmu->dev, as->pd, 0, SMMU_SIZE_PD, - DMA_TO_DEVICE); + as->pd_dma = + dma_map_single(smmu->dev, as->pd, SMMU_SIZE_PD, DMA_TO_DEVICE); if (dma_mapping_error(smmu->dev, as->pd_dma)) { err = -ENOMEM; goto unlock; @@ -450,7 +461,7 @@ static int tegra_smmu_as_prepare(struct tegra_smmu *smmu, return 0; err_unmap: - dma_unmap_page(smmu->dev, as->pd_dma, SMMU_SIZE_PD, DMA_TO_DEVICE); + dma_unmap_single(smmu->dev, as->pd_dma, SMMU_SIZE_PD, DMA_TO_DEVICE); unlock: mutex_unlock(&smmu->lock); @@ -469,7 +480,7 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu, tegra_smmu_free_asid(smmu, as->id); - dma_unmap_page(smmu->dev, as->pd_dma, SMMU_SIZE_PD, DMA_TO_DEVICE); + dma_unmap_single(smmu->dev, as->pd_dma, SMMU_SIZE_PD, DMA_TO_DEVICE); as->smmu = NULL; @@ -548,11 +559,11 @@ static void tegra_smmu_set_pde(struct tegra_smmu_as *as, unsigned long iova, { unsigned int pd_index = iova_pd_index(iova); struct tegra_smmu *smmu = as->smmu; - u32 *pd = page_address(as->pd); + u32 *pd = &as->pd->val[pd_index]; unsigned long offset = pd_index * sizeof(*pd); /* Set the page directory entry first */ - pd[pd_index] = value; + *pd = value; /* The flush the page directory entry from caches */ dma_sync_single_range_for_device(smmu->dev, as->pd_dma, offset, @@ -564,11 +575,9 @@ static void tegra_smmu_set_pde(struct tegra_smmu_as *as, unsigned long iova, smmu_flush(smmu); } -static u32 *tegra_smmu_pte_offset(struct page *pt_page, unsigned long iova) +static u32 *tegra_smmu_pte_offset(struct tegra_pt *pt, unsigned long iova) { - u32 *pt = page_address(pt_page); - - return pt + iova_pt_index(iova); + return &pt->val[iova_pt_index(iova)]; } static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova, @@ -576,21 +585,19 @@ static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova, { unsigned int pd_index = iova_pd_index(iova); struct tegra_smmu *smmu = as->smmu; - struct page *pt_page; - u32 *pd; + struct tegra_pt *pt; - pt_page = as->pts[pd_index]; - if (!pt_page) + pt = as->pts[pd_index]; + if (!pt) return NULL; - pd = page_address(as->pd); - *dmap = smmu_pde_to_dma(smmu, pd[pd_index]); + *dmap = smmu_pde_to_dma(smmu, as->pd->val[pd_index]); - return tegra_smmu_pte_offset(pt_page, iova); + return tegra_smmu_pte_offset(pt, iova); } static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, - dma_addr_t *dmap, struct page *page) + dma_addr_t *dmap, struct tegra_pt *pt) { unsigned int pde = iova_pd_index(iova); struct tegra_smmu *smmu = as->smmu; @@ -598,30 +605,28 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, if (!as->pts[pde]) { dma_addr_t dma; - dma = dma_map_page(smmu->dev, page, 0, SMMU_SIZE_PT, - DMA_TO_DEVICE); + dma = dma_map_single(smmu->dev, pt, SMMU_SIZE_PT, + DMA_TO_DEVICE); if (dma_mapping_error(smmu->dev, dma)) { - __iommu_free_pages(page, 0); + iommu_free_pages(pt); return NULL; } if (!smmu_dma_addr_valid(smmu, dma)) { - dma_unmap_page(smmu->dev, dma, SMMU_SIZE_PT, - DMA_TO_DEVICE); - __iommu_free_pages(page, 0); + dma_unmap_single(smmu->dev, dma, SMMU_SIZE_PT, + DMA_TO_DEVICE); + iommu_free_pages(pt); return NULL; } - as->pts[pde] = page; + as->pts[pde] = pt; tegra_smmu_set_pde(as, iova, SMMU_MK_PDE(dma, SMMU_PDE_ATTR | SMMU_PDE_NEXT)); *dmap = dma; } else { - u32 *pd = page_address(as->pd); - - *dmap = smmu_pde_to_dma(smmu, pd[pde]); + *dmap = smmu_pde_to_dma(smmu, as->pd->val[pde]); } return tegra_smmu_pte_offset(as->pts[pde], iova); @@ -637,7 +642,7 @@ static void tegra_smmu_pte_get_use(struct tegra_smmu_as *as, unsigned long iova) static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) { unsigned int pde = iova_pd_index(iova); - struct page *page = as->pts[pde]; + struct tegra_pt *pt = as->pts[pde]; /* * When no entries in this page table are used anymore, return the @@ -645,13 +650,13 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) */ if (--as->count[pde] == 0) { struct tegra_smmu *smmu = as->smmu; - u32 *pd = page_address(as->pd); - dma_addr_t pte_dma = smmu_pde_to_dma(smmu, pd[pde]); + dma_addr_t pte_dma = smmu_pde_to_dma(smmu, as->pd->val[pde]); tegra_smmu_set_pde(as, iova, 0); - dma_unmap_page(smmu->dev, pte_dma, SMMU_SIZE_PT, DMA_TO_DEVICE); - __iommu_free_pages(page, 0); + dma_unmap_single(smmu->dev, pte_dma, SMMU_SIZE_PT, + DMA_TO_DEVICE); + iommu_free_pages(pt); as->pts[pde] = NULL; } } @@ -671,16 +676,16 @@ static void tegra_smmu_set_pte(struct tegra_smmu_as *as, unsigned long iova, smmu_flush(smmu); } -static struct page *as_get_pde_page(struct tegra_smmu_as *as, - unsigned long iova, gfp_t gfp, - unsigned long *flags) +static struct tegra_pt *as_get_pde_page(struct tegra_smmu_as *as, + unsigned long iova, gfp_t gfp, + unsigned long *flags) { unsigned int pde = iova_pd_index(iova); - struct page *page = as->pts[pde]; + struct tegra_pt *pt = as->pts[pde]; /* at first check whether allocation needs to be done at all */ - if (page) - return page; + if (pt) + return pt; /* * In order to prevent exhaustion of the atomic memory pool, we @@ -690,7 +695,7 @@ static struct page *as_get_pde_page(struct tegra_smmu_as *as, if (gfpflags_allow_blocking(gfp)) spin_unlock_irqrestore(&as->lock, *flags); - page = __iommu_alloc_pages(gfp | __GFP_DMA, 0); + pt = iommu_alloc_pages_sz(gfp | __GFP_DMA, SMMU_SIZE_PT); if (gfpflags_allow_blocking(gfp)) spin_lock_irqsave(&as->lock, *flags); @@ -701,13 +706,13 @@ static struct page *as_get_pde_page(struct tegra_smmu_as *as, * if allocation succeeded and the allocation failure isn't fatal. */ if (as->pts[pde]) { - if (page) - __iommu_free_pages(page, 0); + if (pt) + iommu_free_pages(pt); - page = as->pts[pde]; + pt = as->pts[pde]; } - return page; + return pt; } static int @@ -717,15 +722,15 @@ __tegra_smmu_map(struct iommu_domain *domain, unsigned long iova, { struct tegra_smmu_as *as = to_smmu_as(domain); dma_addr_t pte_dma; - struct page *page; + struct tegra_pt *pt; u32 pte_attrs; u32 *pte; - page = as_get_pde_page(as, iova, gfp, flags); - if (!page) + pt = as_get_pde_page(as, iova, gfp, flags); + if (!pt) return -ENOMEM; - pte = as_get_pte(as, iova, &pte_dma, page); + pte = as_get_pte(as, iova, &pte_dma, pt); if (!pte) return -ENOMEM; @@ -846,7 +851,6 @@ static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev, err = ops->of_xlate(dev, args); if (err < 0) { dev_err(dev, "failed to parse SW group ID: %d\n", err); - iommu_fwspec_free(dev); return err; } diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index b85ce6310ddb..ecd41fb03e5a 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -48,6 +48,7 @@ struct viommu_dev { u64 pgsize_bitmap; u32 first_domain; u32 last_domain; + u32 identity_domain_id; /* Supported MAP flags */ u32 map_flags; u32 probe_size; @@ -62,7 +63,6 @@ struct viommu_mapping { struct viommu_domain { struct iommu_domain domain; struct viommu_dev *viommu; - struct mutex mutex; /* protects viommu pointer */ unsigned int id; u32 map_flags; @@ -70,7 +70,6 @@ struct viommu_domain { struct rb_root_cached mappings; unsigned long nr_endpoints; - bool bypass; }; struct viommu_endpoint { @@ -97,6 +96,8 @@ struct viommu_event { }; }; +static struct viommu_domain viommu_identity_domain; + #define to_viommu_domain(domain) \ container_of(domain, struct viommu_domain, domain) @@ -305,6 +306,22 @@ out_unlock: return ret; } +static int viommu_send_attach_req(struct viommu_dev *viommu, struct device *dev, + struct virtio_iommu_req_attach *req) +{ + int ret; + unsigned int i; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + for (i = 0; i < fwspec->num_ids; i++) { + req->endpoint = cpu_to_le32(fwspec->ids[i]); + ret = viommu_send_req_sync(viommu, req, sizeof(*req)); + if (ret) + return ret; + } + return 0; +} + /* * viommu_add_mapping - add a mapping to the internal tree * @@ -637,71 +654,45 @@ static void viommu_event_handler(struct virtqueue *vq) /* IOMMU API */ -static struct iommu_domain *viommu_domain_alloc(unsigned type) +static struct iommu_domain *viommu_domain_alloc_paging(struct device *dev) { + struct viommu_endpoint *vdev = dev_iommu_priv_get(dev); + struct viommu_dev *viommu = vdev->viommu; + unsigned long viommu_page_size; struct viommu_domain *vdomain; - - if (type != IOMMU_DOMAIN_UNMANAGED && - type != IOMMU_DOMAIN_DMA && - type != IOMMU_DOMAIN_IDENTITY) - return NULL; - - vdomain = kzalloc(sizeof(*vdomain), GFP_KERNEL); - if (!vdomain) - return NULL; - - mutex_init(&vdomain->mutex); - spin_lock_init(&vdomain->mappings_lock); - vdomain->mappings = RB_ROOT_CACHED; - - return &vdomain->domain; -} - -static int viommu_domain_finalise(struct viommu_endpoint *vdev, - struct iommu_domain *domain) -{ int ret; - unsigned long viommu_page_size; - struct viommu_dev *viommu = vdev->viommu; - struct viommu_domain *vdomain = to_viommu_domain(domain); viommu_page_size = 1UL << __ffs(viommu->pgsize_bitmap); if (viommu_page_size > PAGE_SIZE) { dev_err(vdev->dev, "granule 0x%lx larger than system page size 0x%lx\n", viommu_page_size, PAGE_SIZE); - return -ENODEV; + return ERR_PTR(-ENODEV); } - ret = ida_alloc_range(&viommu->domain_ids, viommu->first_domain, - viommu->last_domain, GFP_KERNEL); - if (ret < 0) - return ret; + vdomain = kzalloc(sizeof(*vdomain), GFP_KERNEL); + if (!vdomain) + return ERR_PTR(-ENOMEM); - vdomain->id = (unsigned int)ret; + spin_lock_init(&vdomain->mappings_lock); + vdomain->mappings = RB_ROOT_CACHED; - domain->pgsize_bitmap = viommu->pgsize_bitmap; - domain->geometry = viommu->geometry; + ret = ida_alloc_range(&viommu->domain_ids, viommu->first_domain, + viommu->last_domain, GFP_KERNEL); + if (ret < 0) { + kfree(vdomain); + return ERR_PTR(ret); + } - vdomain->map_flags = viommu->map_flags; - vdomain->viommu = viommu; + vdomain->id = (unsigned int)ret; - if (domain->type == IOMMU_DOMAIN_IDENTITY) { - if (virtio_has_feature(viommu->vdev, - VIRTIO_IOMMU_F_BYPASS_CONFIG)) { - vdomain->bypass = true; - return 0; - } + vdomain->domain.pgsize_bitmap = viommu->pgsize_bitmap; + vdomain->domain.geometry = viommu->geometry; - ret = viommu_domain_map_identity(vdev, vdomain); - if (ret) { - ida_free(&viommu->domain_ids, vdomain->id); - vdomain->viommu = NULL; - return ret; - } - } + vdomain->map_flags = viommu->map_flags; + vdomain->viommu = viommu; - return 0; + return &vdomain->domain; } static void viommu_domain_free(struct iommu_domain *domain) @@ -717,29 +708,37 @@ static void viommu_domain_free(struct iommu_domain *domain) kfree(vdomain); } +static struct iommu_domain *viommu_domain_alloc_identity(struct device *dev) +{ + struct viommu_endpoint *vdev = dev_iommu_priv_get(dev); + struct iommu_domain *domain; + int ret; + + if (virtio_has_feature(vdev->viommu->vdev, + VIRTIO_IOMMU_F_BYPASS_CONFIG)) + return &viommu_identity_domain.domain; + + domain = viommu_domain_alloc_paging(dev); + if (IS_ERR(domain)) + return domain; + + ret = viommu_domain_map_identity(vdev, to_viommu_domain(domain)); + if (ret) { + viommu_domain_free(domain); + return ERR_PTR(ret); + } + return domain; +} + static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev) { - int i; int ret = 0; struct virtio_iommu_req_attach req; - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct viommu_endpoint *vdev = dev_iommu_priv_get(dev); struct viommu_domain *vdomain = to_viommu_domain(domain); - mutex_lock(&vdomain->mutex); - if (!vdomain->viommu) { - /* - * Properly initialize the domain now that we know which viommu - * owns it. - */ - ret = viommu_domain_finalise(vdev, domain); - } else if (vdomain->viommu != vdev->viommu) { - ret = -EINVAL; - } - mutex_unlock(&vdomain->mutex); - - if (ret) - return ret; + if (vdomain->viommu != vdev->viommu) + return -EINVAL; /* * In the virtio-iommu device, when attaching the endpoint to a new @@ -761,16 +760,9 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev) .domain = cpu_to_le32(vdomain->id), }; - if (vdomain->bypass) - req.flags |= cpu_to_le32(VIRTIO_IOMMU_ATTACH_F_BYPASS); - - for (i = 0; i < fwspec->num_ids; i++) { - req.endpoint = cpu_to_le32(fwspec->ids[i]); - - ret = viommu_send_req_sync(vdomain->viommu, &req, sizeof(req)); - if (ret) - return ret; - } + ret = viommu_send_attach_req(vdomain->viommu, dev, &req); + if (ret) + return ret; if (!vdomain->nr_endpoints) { /* @@ -788,6 +780,40 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev) return 0; } +static int viommu_attach_identity_domain(struct iommu_domain *domain, + struct device *dev) +{ + int ret = 0; + struct virtio_iommu_req_attach req; + struct viommu_endpoint *vdev = dev_iommu_priv_get(dev); + struct viommu_domain *vdomain = to_viommu_domain(domain); + + req = (struct virtio_iommu_req_attach) { + .head.type = VIRTIO_IOMMU_T_ATTACH, + .domain = cpu_to_le32(vdev->viommu->identity_domain_id), + .flags = cpu_to_le32(VIRTIO_IOMMU_ATTACH_F_BYPASS), + }; + + ret = viommu_send_attach_req(vdev->viommu, dev, &req); + if (ret) + return ret; + + if (vdev->vdomain) + vdev->vdomain->nr_endpoints--; + vdomain->nr_endpoints++; + vdev->vdomain = vdomain; + return 0; +} + +static struct viommu_domain viommu_identity_domain = { + .domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &(const struct iommu_domain_ops) { + .attach_dev = viommu_attach_identity_domain, + }, + }, +}; + static void viommu_detach_dev(struct viommu_endpoint *vdev) { int i; @@ -1062,7 +1088,8 @@ static bool viommu_capable(struct device *dev, enum iommu_cap cap) static struct iommu_ops viommu_ops = { .capable = viommu_capable, - .domain_alloc = viommu_domain_alloc, + .domain_alloc_identity = viommu_domain_alloc_identity, + .domain_alloc_paging = viommu_domain_alloc_paging, .probe_device = viommu_probe_device, .release_device = viommu_release_device, .device_group = viommu_device_group, @@ -1184,6 +1211,12 @@ static int viommu_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_IOMMU_F_MMIO)) viommu->map_flags |= VIRTIO_IOMMU_MAP_F_MMIO; + /* Reserve an ID to use as the bypass domain */ + if (virtio_has_feature(viommu->vdev, VIRTIO_IOMMU_F_BYPASS_CONFIG)) { + viommu->identity_domain_id = viommu->first_domain; + viommu->first_domain++; + } + viommu_ops.pgsize_bitmap = viommu->pgsize_bitmap; virtio_device_ready(vdev); |