summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-07-02 23:22:47 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2021-07-02 23:22:47 +0300
commitcd3eb7efaa995db00db0ba64893814f9831be842 (patch)
treee6c5d21c5ea43e796a6cc61dde7efcad078888d9
parent35e43538af8fd2cb39d58caca1134a87db173f75 (diff)
parent2b9d8e3e9a9bb693a8b8bd26ad192db037517759 (diff)
downloadlinux-cd3eb7efaa995db00db0ba64893814f9831be842.tar.xz
Merge tag 'iommu-updates-v5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull iommu updates from Joerg Roedel: - SMMU Updates from Will Deacon: - SMMUv3: - Support stalling faults for platform devices - Decrease defaults sizes for the event and PRI queues - SMMUv2: - Support for a new '->probe_finalize' hook, needed by Nvidia - Even more Qualcomm compatible strings - Avoid Adreno TTBR1 quirk for DB820C platform - Intel VT-d updates from Lu Baolu: - Convert Intel IOMMU to use sva_lib helpers in iommu core - ftrace and debugfs supports for page fault handling - Support asynchronous nested capabilities - Various misc cleanups - Support for new VIOT ACPI table to make the VirtIO IOMMU available on x86 - Add the amd_iommu=force_enable command line option to enable the IOMMU on platforms where they are known to cause problems - Support for version 2 of the Rockchip IOMMU - Various smaller fixes, cleanups and refactorings * tag 'iommu-updates-v5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (66 commits) iommu/virtio: Enable x86 support iommu/dma: Pass address limit rather than size to iommu_setup_dma_ops() ACPI: Add driver for the VIOT table ACPI: Move IOMMU setup code out of IORT ACPI: arm64: Move DMA setup operations out of IORT iommu/vt-d: Fix dereference of pointer info before it is null checked iommu: Update "iommu.strict" documentation iommu/arm-smmu: Check smmu->impl pointer before dereferencing iommu/arm-smmu-v3: Remove unnecessary oom message iommu/arm-smmu: Fix arm_smmu_device refcount leak in address translation iommu/arm-smmu: Fix arm_smmu_device refcount leak when arm_smmu_rpm_get fails iommu/vt-d: Fix linker error on 32-bit iommu/vt-d: No need to typecast iommu/vt-d: Define counter explicitly as unsigned int iommu/vt-d: Remove unnecessary braces iommu/vt-d: Removed unused iommu_count in dmar domain iommu/vt-d: Use bitfields for DMAR capabilities iommu/vt-d: Use DEVICE_ATTR_RO macro iommu/vt-d: Fix out-bounds-warning in intel/svm.c iommu/vt-d: Add PRQ handling latency sampling ...
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt9
-rw-r--r--Documentation/devicetree/bindings/iommu/iommu.txt18
-rw-r--r--Documentation/devicetree/bindings/iommu/rockchip,iommu.txt38
-rw-r--r--Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml85
-rw-r--r--MAINTAINERS8
-rw-r--r--arch/arm64/boot/dts/qcom/msm8996.dtsi2
-rw-r--r--arch/arm64/mm/dma-mapping.c2
-rw-r--r--drivers/acpi/Kconfig3
-rw-r--r--drivers/acpi/Makefile2
-rw-r--r--drivers/acpi/arm64/Makefile1
-rw-r--r--drivers/acpi/arm64/dma.c50
-rw-r--r--drivers/acpi/arm64/iort.c132
-rw-r--r--drivers/acpi/bus.c2
-rw-r--r--drivers/acpi/scan.c78
-rw-r--r--drivers/acpi/viot.c366
-rw-r--r--drivers/iommu/Kconfig4
-rw-r--r--drivers/iommu/amd/amd_iommu.h2
-rw-r--r--drivers/iommu/amd/init.c20
-rw-r--r--drivers/iommu/amd/iommu.c33
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c59
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c223
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h48
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c43
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.c39
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.h1
-rw-r--r--drivers/iommu/arm/arm-smmu/qcom_iommu.c14
-rw-r--r--drivers/iommu/dma-iommu.c19
-rw-r--r--drivers/iommu/exynos-iommu.c1
-rw-r--r--drivers/iommu/intel/Kconfig6
-rw-r--r--drivers/iommu/intel/Makefile1
-rw-r--r--drivers/iommu/intel/debugfs.c111
-rw-r--r--drivers/iommu/intel/dmar.c54
-rw-r--r--drivers/iommu/intel/iommu.c172
-rw-r--r--drivers/iommu/intel/pasid.c2
-rw-r--r--drivers/iommu/intel/perf.c166
-rw-r--r--drivers/iommu/intel/perf.h73
-rw-r--r--drivers/iommu/intel/svm.c643
-rw-r--r--drivers/iommu/iommu.c3
-rw-r--r--drivers/iommu/iova.c18
-rw-r--r--drivers/iommu/ipmmu-vmsa.c1
-rw-r--r--drivers/iommu/msm_iommu.c1
-rw-r--r--drivers/iommu/mtk_iommu.c1
-rw-r--r--drivers/iommu/mtk_iommu_v1.c1
-rw-r--r--drivers/iommu/of_iommu.c68
-rw-r--r--drivers/iommu/omap-iommu.c1
-rw-r--r--drivers/iommu/rockchip-iommu.c175
-rw-r--r--drivers/iommu/virtio-iommu.c12
-rw-r--r--drivers/of/platform.c1
-rw-r--r--include/acpi/acpi_bus.h3
-rw-r--r--include/linux/acpi.h3
-rw-r--r--include/linux/acpi_iort.h14
-rw-r--r--include/linux/acpi_viot.h19
-rw-r--r--include/linux/dma-iommu.h4
-rw-r--r--include/linux/intel-iommu.h44
-rw-r--r--include/linux/of_iommu.h17
-rw-r--r--include/trace/events/intel_iommu.h37
56 files changed, 2172 insertions, 781 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 13f13fdd4731..0081d83457e8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -301,6 +301,9 @@
allowed anymore to lift isolation
requirements as needed. This option
does not override iommu=pt
+ force_enable - Force enable the IOMMU on platforms known
+ to be buggy with IOMMU enabled. Use this
+ option with care.
amd_iommu_dump= [HW,X86-64]
Enable AMD IOMMU driver option to dump the ACPI table
@@ -2031,7 +2034,7 @@
forcing Dual Address Cycle for PCI cards supporting
greater than 32-bit addressing.
- iommu.strict= [ARM64] Configure TLB invalidation behaviour
+ iommu.strict= [ARM64, X86] Configure TLB invalidation behaviour
Format: { "0" | "1" }
0 - Lazy mode.
Request that DMA unmap operations use deferred
@@ -2042,6 +2045,10 @@
1 - Strict mode (default).
DMA unmap operations invalidate IOMMU hardware TLBs
synchronously.
+ Note: on x86, the default behaviour depends on the
+ equivalent driver-specific parameters, but a strict
+ mode explicitly specified by either method takes
+ precedence.
iommu.passthrough=
[ARM64, X86] Configure DMA to bypass the IOMMU by default.
diff --git a/Documentation/devicetree/bindings/iommu/iommu.txt b/Documentation/devicetree/bindings/iommu/iommu.txt
index 3c36334e4f94..26ba9e530f13 100644
--- a/Documentation/devicetree/bindings/iommu/iommu.txt
+++ b/Documentation/devicetree/bindings/iommu/iommu.txt
@@ -92,6 +92,24 @@ Optional properties:
tagging DMA transactions with an address space identifier. By default,
this is 0, which means that the device only has one address space.
+- dma-can-stall: When present, the master can wait for a transaction to
+ complete for an indefinite amount of time. Upon translation fault some
+ IOMMUs, instead of aborting the translation immediately, may first
+ notify the driver and keep the transaction in flight. This allows the OS
+ to inspect the fault and, for example, make physical pages resident
+ before updating the mappings and completing the transaction. Such IOMMU
+ accepts a limited number of simultaneous stalled transactions before
+ having to either put back-pressure on the master, or abort new faulting
+ transactions.
+
+ Firmware has to opt-in stalling, because most buses and masters don't
+ support it. In particular it isn't compatible with PCI, where
+ transactions have to complete before a time limit. More generally it
+ won't work in systems and masters that haven't been designed for
+ stalling. For example the OS, in order to handle a stalled transaction,
+ may attempt to retrieve pages from secondary storage in a stalled
+ domain, leading to a deadlock.
+
Notes:
======
diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
deleted file mode 100644
index 6ecefea1c6f9..000000000000
--- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-Rockchip IOMMU
-==============
-
-A Rockchip DRM iommu translates io virtual addresses to physical addresses for
-its master device. Each slave device is bound to a single master device, and
-shares its clocks, power domain and irq.
-
-Required properties:
-- compatible : Should be "rockchip,iommu"
-- reg : Address space for the configuration registers
-- interrupts : Interrupt specifier for the IOMMU instance
-- interrupt-names : Interrupt name for the IOMMU instance
-- #iommu-cells : Should be <0>. This indicates the iommu is a
- "single-master" device, and needs no additional information
- to associate with its master device. See:
- Documentation/devicetree/bindings/iommu/iommu.txt
-- clocks : A list of clocks required for the IOMMU to be accessible by
- the host CPU.
-- clock-names : Should contain the following:
- "iface" - Main peripheral bus clock (PCLK/HCL) (required)
- "aclk" - AXI bus clock (required)
-
-Optional properties:
-- rockchip,disable-mmu-reset : Don't use the mmu reset operation.
- Some mmu instances may produce unexpected results
- when the reset operation is used.
-
-Example:
-
- vopl_mmu: iommu@ff940300 {
- compatible = "rockchip,iommu";
- reg = <0xff940300 0x100>;
- interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "vopl_mmu";
- clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>;
- clock-names = "aclk", "iface";
- #iommu-cells = <0>;
- };
diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
new file mode 100644
index 000000000000..d2e28a9e3545
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.yaml
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iommu/rockchip,iommu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip IOMMU
+
+maintainers:
+ - Heiko Stuebner <heiko@sntech.de>
+
+description: |+
+ A Rockchip DRM iommu translates io virtual addresses to physical addresses for
+ its master device. Each slave device is bound to a single master device and
+ shares its clocks, power domain and irq.
+
+ For information on assigning IOMMU controller to its peripheral devices,
+ see generic IOMMU bindings.
+
+properties:
+ compatible:
+ enum:
+ - rockchip,iommu
+ - rockchip,rk3568-iommu
+
+ reg:
+ items:
+ - description: configuration registers for MMU instance 0
+ - description: configuration registers for MMU instance 1
+ minItems: 1
+ maxItems: 2
+
+ interrupts:
+ items:
+ - description: interruption for MMU instance 0
+ - description: interruption for MMU instance 1
+ minItems: 1
+ maxItems: 2
+
+ clocks:
+ items:
+ - description: Core clock
+ - description: Interface clock
+
+ clock-names:
+ items:
+ - const: aclk
+ - const: iface
+
+ "#iommu-cells":
+ const: 0
+
+ power-domains:
+ maxItems: 1
+
+ rockchip,disable-mmu-reset:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description: |
+ Do not use the mmu reset operation.
+ Some mmu instances may produce unexpected results
+ when the reset operation is used.
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - "#iommu-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/rk3399-cru.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ vopl_mmu: iommu@ff940300 {
+ compatible = "rockchip,iommu";
+ reg = <0xff940300 0x100>;
+ interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>;
+ clock-names = "aclk", "iface";
+ #iommu-cells = <0>;
+ };
diff --git a/MAINTAINERS b/MAINTAINERS
index 0d9d8fbd9e92..4d42bc8c9b8b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -431,6 +431,14 @@ W: https://01.org/linux-acpi
B: https://bugzilla.kernel.org
F: drivers/acpi/acpi_video.c
+ACPI VIOT DRIVER
+M: Jean-Philippe Brucker <jean-philippe@linaro.org>
+L: linux-acpi@vger.kernel.org
+L: iommu@lists.linux-foundation.org
+S: Maintained
+F: drivers/acpi/viot.c
+F: include/linux/acpi_viot.h
+
ACPI WMI DRIVER
L: platform-driver-x86@vger.kernel.org
S: Orphan
diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index ce430ba9c118..5d06216c5c1c 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -1136,7 +1136,7 @@
};
adreno_smmu: iommu@b40000 {
- compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2";
+ compatible = "qcom,msm8996-smmu-v2", "qcom,adreno-smmu", "qcom,smmu-v2";
reg = <0x00b40000 0x10000>;
#global-interrupts = <1>;
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 4bf1dd3eb041..6719f9efea09 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -50,7 +50,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
dev->dma_coherent = coherent;
if (iommu)
- iommu_setup_dma_ops(dev, dma_base, size);
+ iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1);
#ifdef CONFIG_XEN
if (xen_swiotlb_detect())
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 3972de7b7565..fe0bb6277e4d 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -526,6 +526,9 @@ endif
source "drivers/acpi/pmic/Kconfig"
+config ACPI_VIOT
+ bool
+
endif # ACPI
config X86_PM_TIMER
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index ceb1aed4b1fc..3018714e87d9 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -124,3 +124,5 @@ video-objs += acpi_video.o video_detect.o
obj-y += dptf/
obj-$(CONFIG_ARM64) += arm64/
+
+obj-$(CONFIG_ACPI_VIOT) += viot.o
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
index 6ff50f4ed947..66acbe77f46e 100644
--- a/drivers/acpi/arm64/Makefile
+++ b/drivers/acpi/arm64/Makefile
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_ACPI_IORT) += iort.o
obj-$(CONFIG_ACPI_GTDT) += gtdt.o
+obj-y += dma.o
diff --git a/drivers/acpi/arm64/dma.c b/drivers/acpi/arm64/dma.c
new file mode 100644
index 000000000000..f16739ad3cc0
--- /dev/null
+++ b/drivers/acpi/arm64/dma.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/acpi.h>
+#include <linux/acpi_iort.h>
+#include <linux/device.h>
+#include <linux/dma-direct.h>
+
+void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size)
+{
+ int ret;
+ u64 end, mask;
+ u64 dmaaddr = 0, size = 0, offset = 0;
+
+ /*
+ * If @dev is expected to be DMA-capable then the bus code that created
+ * it should have initialised its dma_mask pointer by this point. For
+ * now, we'll continue the legacy behaviour of coercing it to the
+ * coherent mask if not, but we'll no longer do so quietly.
+ */
+ if (!dev->dma_mask) {
+ dev_warn(dev, "DMA mask not set\n");
+ dev->dma_mask = &dev->coherent_dma_mask;
+ }
+
+ if (dev->coherent_dma_mask)
+ size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
+ else
+ size = 1ULL << 32;
+
+ ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size);
+ if (ret == -ENODEV)
+ ret = iort_dma_get_ranges(dev, &size);
+ if (!ret) {
+ /*
+ * Limit coherent and dma mask based on size retrieved from
+ * firmware.
+ */
+ end = dmaaddr + size - 1;
+ mask = DMA_BIT_MASK(ilog2(end) + 1);
+ dev->bus_dma_limit = end;
+ dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask);
+ *dev->dma_mask = min(*dev->dma_mask, mask);
+ }
+
+ *dma_addr = dmaaddr;
+ *dma_size = size;
+
+ ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size);
+
+ dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : "");
+}
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index e34937e11186..3b23fb775ac4 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -806,23 +806,6 @@ static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev)
return NULL;
}
-static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev)
-{
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
-
- return (fwspec && fwspec->ops) ? fwspec->ops : NULL;
-}
-
-static inline int iort_add_device_replay(struct device *dev)
-{
- int err = 0;
-
- if (dev->bus && !device_iommu_mapped(dev))
- err = iommu_probe_device(dev);
-
- return err;
-}
-
/**
* iort_iommu_msi_get_resv_regions - Reserved region driver helper
* @dev: Device from iommu_get_resv_regions()
@@ -900,18 +883,6 @@ static inline bool iort_iommu_driver_enabled(u8 type)
}
}
-static int arm_smmu_iort_xlate(struct device *dev, u32 streamid,
- struct fwnode_handle *fwnode,
- const struct iommu_ops *ops)
-{
- int ret = iommu_fwspec_init(dev, fwnode, ops);
-
- if (!ret)
- ret = iommu_fwspec_add_ids(dev, &streamid, 1);
-
- return ret;
-}
-
static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node)
{
struct acpi_iort_root_complex *pci_rc;
@@ -946,7 +917,7 @@ static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node,
return iort_iommu_driver_enabled(node->type) ?
-EPROBE_DEFER : -ENODEV;
- return arm_smmu_iort_xlate(dev, streamid, iort_fwnode, ops);
+ return acpi_iommu_fwspec_init(dev, streamid, iort_fwnode, ops);
}
struct iort_pci_alias_info {
@@ -968,13 +939,15 @@ static int iort_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
static void iort_named_component_init(struct device *dev,
struct acpi_iort_node *node)
{
- struct property_entry props[2] = {};
+ struct property_entry props[3] = {};
struct acpi_iort_named_component *nc;
nc = (struct acpi_iort_named_component *)node->node_data;
props[0] = PROPERTY_ENTRY_U32("pasid-num-bits",
FIELD_GET(ACPI_IORT_NC_PASID_BITS,
nc->node_flags));
+ if (nc->node_flags & ACPI_IORT_NC_STALL_SUPPORTED)
+ props[1] = PROPERTY_ENTRY_BOOL("dma-can-stall");
if (device_create_managed_software_node(dev, props, NULL))
dev_warn(dev, "Could not add device properties\n");
@@ -1020,24 +993,13 @@ static int iort_nc_iommu_map_id(struct device *dev,
* @dev: device to configure
* @id_in: optional input id const value pointer
*
- * Returns: iommu_ops pointer on configuration success
- * NULL on configuration failure
+ * Returns: 0 on success, <0 on failure
*/
-const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
- const u32 *id_in)
+int iort_iommu_configure_id(struct device *dev, const u32 *id_in)
{
struct acpi_iort_node *node;
- const struct iommu_ops *ops;
int err = -ENODEV;
- /*
- * If we already translated the fwspec there
- * is nothing left to do, return the iommu_ops.
- */
- ops = iort_fwspec_iommu_ops(dev);
- if (ops)
- return ops;
-
if (dev_is_pci(dev)) {
struct iommu_fwspec *fwspec;
struct pci_bus *bus = to_pci_dev(dev)->bus;
@@ -1046,7 +1008,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
node = iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX,
iort_match_node_callback, &bus->dev);
if (!node)
- return NULL;
+ return -ENODEV;
info.node = node;
err = pci_for_each_dma_alias(to_pci_dev(dev),
@@ -1059,7 +1021,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
node = iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT,
iort_match_node_callback, dev);
if (!node)
- return NULL;
+ return -ENODEV;
err = id_in ? iort_nc_iommu_map_id(dev, node, id_in) :
iort_nc_iommu_map(dev, node);
@@ -1068,32 +1030,14 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
iort_named_component_init(dev, node);
}
- /*
- * If we have reason to believe the IOMMU driver missed the initial
- * add_device callback for dev, replay it to get things in order.
- */
- if (!err) {
- ops = iort_fwspec_iommu_ops(dev);
- err = iort_add_device_replay(dev);
- }
-
- /* Ignore all other errors apart from EPROBE_DEFER */
- if (err == -EPROBE_DEFER) {
- ops = ERR_PTR(err);
- } else if (err) {
- dev_dbg(dev, "Adding to IOMMU failed: %d\n", err);
- ops = NULL;
- }
-
- return ops;
+ return err;
}
#else
int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
{ return 0; }
-const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
- const u32 *input_id)
-{ return NULL; }
+int iort_iommu_configure_id(struct device *dev, const u32 *input_id)
+{ return -ENODEV; }
#endif
static int nc_dma_get_range(struct device *dev, u64 *size)
@@ -1144,56 +1088,18 @@ static int rc_dma_get_range(struct device *dev, u64 *size)
}
/**
- * iort_dma_setup() - Set-up device DMA parameters.
+ * iort_dma_get_ranges() - Look up DMA addressing limit for the device
+ * @dev: device to lookup
+ * @size: DMA range size result pointer
*
- * @dev: device to configure
- * @dma_addr: device DMA address result pointer
- * @dma_size: DMA range size result pointer
+ * Return: 0 on success, an error otherwise.
*/
-void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size)
+int iort_dma_get_ranges(struct device *dev, u64 *size)
{
- u64 end, mask, dmaaddr = 0, size = 0, offset = 0;
- int ret;
-
- /*
- * If @dev is expected to be DMA-capable then the bus code that created
- * it should have initialised its dma_mask pointer by this point. For
- * now, we'll continue the legacy behaviour of coercing it to the
- * coherent mask if not, but we'll no longer do so quietly.
- */
- if (!dev->dma_mask) {
- dev_warn(dev, "DMA mask not set\n");
- dev->dma_mask = &dev->coherent_dma_mask;
- }
-
- if (dev->coherent_dma_mask)
- size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
+ if (dev_is_pci(dev))
+ return rc_dma_get_range(dev, size);
else
- size = 1ULL << 32;
-
- ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size);
- if (ret == -ENODEV)
- ret = dev_is_pci(dev) ? rc_dma_get_range(dev, &size)
- : nc_dma_get_range(dev, &size);
-
- if (!ret) {
- /*
- * Limit coherent and dma mask based on size retrieved from
- * firmware.
- */
- end = dmaaddr + size - 1;
- mask = DMA_BIT_MASK(ilog2(end) + 1);
- dev->bus_dma_limit = end;
- dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask);
- *dev->dma_mask = min(*dev->dma_mask, mask);
- }
-
- *dma_addr = dmaaddr;
- *dma_size = size;
-
- ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size);
-
- dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : "");
+ return nc_dma_get_range(dev, size);
}
static void __init acpi_iort_register_irq(int hwirq, const char *name,
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 60fb6a843853..ee24246d88fd 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -27,6 +27,7 @@
#include <linux/dmi.h>
#endif
#include <linux/acpi_iort.h>
+#include <linux/acpi_viot.h>
#include <linux/pci.h>
#include <acpi/apei.h>
#include <linux/suspend.h>
@@ -1335,6 +1336,7 @@ static int __init acpi_init(void)
acpi_wakeup_device_init();
acpi_debugger_init();
acpi_setup_sb_notify_handler();
+ acpi_viot_init();
return 0;
}
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 0641bc20b097..b24513ec3fae 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -11,6 +11,8 @@
#include <linux/kernel.h>
#include <linux/acpi.h>
#include <linux/acpi_iort.h>
+#include <linux/acpi_viot.h>
+#include <linux/iommu.h>
#include <linux/signal.h>
#include <linux/kthread.h>
#include <linux/dmi.h>
@@ -1526,6 +1528,78 @@ int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset,
return ret >= 0 ? 0 : ret;
}
+#ifdef CONFIG_IOMMU_API
+int acpi_iommu_fwspec_init(struct device *dev, u32 id,
+ struct fwnode_handle *fwnode,
+ const struct iommu_ops *ops)
+{
+ int ret = iommu_fwspec_init(dev, fwnode, ops);
+
+ if (!ret)
+ ret = iommu_fwspec_add_ids(dev, &id, 1);
+
+ return ret;
+}
+
+static inline const struct iommu_ops *acpi_iommu_fwspec_ops(struct device *dev)
+{
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+
+ return fwspec ? fwspec->ops : NULL;
+}
+
+static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev,
+ const u32 *id_in)
+{
+ int err;
+ const struct iommu_ops *ops;
+
+ /*
+ * If we already translated the fwspec there is nothing left to do,
+ * return the iommu_ops.
+ */
+ ops = acpi_iommu_fwspec_ops(dev);
+ if (ops)
+ return ops;
+
+ err = iort_iommu_configure_id(dev, id_in);
+ if (err && err != -EPROBE_DEFER)
+ err = viot_iommu_configure(dev);
+
+ /*
+ * If we have reason to believe the IOMMU driver missed the initial
+ * iommu_probe_device() call for dev, replay it to get things in order.
+ */
+ if (!err && dev->bus && !device_iommu_mapped(dev))
+ err = iommu_probe_device(dev);
+
+ /* Ignore all other errors apart from EPROBE_DEFER */
+ if (err == -EPROBE_DEFER) {
+ return ERR_PTR(err);
+ } else if (err) {
+ dev_dbg(dev, "Adding to IOMMU failed: %d\n", err);
+ return NULL;
+ }
+ return acpi_iommu_fwspec_ops(dev);
+}
+
+#else /* !CONFIG_IOMMU_API */
+
+int acpi_iommu_fwspec_init(struct device *dev, u32 id,
+ struct fwnode_handle *fwnode,
+ const struct iommu_ops *ops)
+{
+ return -ENODEV;
+}
+
+static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev,
+ const u32 *id_in)
+{
+ return NULL;
+}
+
+#endif /* !CONFIG_IOMMU_API */
+
/**
* acpi_dma_configure_id - Set-up DMA configuration for the device.
* @dev: The pointer to the device
@@ -1543,9 +1617,9 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr,
return 0;
}
- iort_dma_setup(dev, &dma_addr, &size);
+ acpi_arch_dma_setup(dev, &dma_addr, &size);
- iommu = iort_iommu_configure_id(dev, input_id);
+ iommu = acpi_iommu_configure_id(dev, input_id);
if (PTR_ERR(iommu) == -EPROBE_DEFER)
return -EPROBE_DEFER;
diff --git a/drivers/acpi/viot.c b/drivers/acpi/viot.c
new file mode 100644
index 000000000000..d2256326c73a
--- /dev/null
+++ b/drivers/acpi/viot.c
@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Virtual I/O topology
+ *
+ * The Virtual I/O Translation Table (VIOT) describes the topology of
+ * para-virtual IOMMUs and the endpoints they manage. The OS uses it to
+ * initialize devices in the right order, preventing endpoints from issuing DMA
+ * before their IOMMU is ready.
+ *
+ * When binding a driver to a device, before calling the device driver's probe()
+ * method, the driver infrastructure calls dma_configure(). At that point the
+ * VIOT driver looks for an IOMMU associated to the device in the VIOT table.
+ * If an IOMMU exists and has been initialized, the VIOT driver initializes the
+ * device's IOMMU fwspec, allowing the DMA infrastructure to invoke the IOMMU
+ * ops when the device driver configures DMA mappings. If an IOMMU exists and
+ * hasn't yet been initialized, VIOT returns -EPROBE_DEFER to postpone probing
+ * the device until the IOMMU is available.
+ */
+#define pr_fmt(fmt) "ACPI: VIOT: " fmt
+
+#include <linux/acpi_viot.h>
+#include <linux/dma-iommu.h>
+#include <linux/fwnode.h>
+#include <linux/iommu.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+
+struct viot_iommu {
+ /* Node offset within the table */
+ unsigned int offset;
+ struct fwnode_handle *fwnode;
+ struct list_head list;
+};
+
+struct viot_endpoint {
+ union {
+ /* PCI range */
+ struct {
+ u16 segment_start;
+ u16 segment_end;
+ u16 bdf_start;
+ u16 bdf_end;
+ };
+ /* MMIO */
+ u64 address;
+ };
+ u32 endpoint_id;
+ struct viot_iommu *viommu;
+ struct list_head list;
+};
+
+static struct acpi_table_viot *viot;
+static LIST_HEAD(viot_iommus);
+static LIST_HEAD(viot_pci_ranges);
+static LIST_HEAD(viot_mmio_endpoints);
+
+static int __init viot_check_bounds(const struct acpi_viot_header *hdr)
+{
+ struct acpi_viot_header *start, *end, *hdr_end;
+
+ start = ACPI_ADD_PTR(struct acpi_viot_header, viot,
+ max_t(size_t, sizeof(*viot), viot->node_offset));
+ end = ACPI_ADD_PTR(struct acpi_viot_header, viot, viot->header.length);
+ hdr_end = ACPI_ADD_PTR(struct acpi_viot_header, hdr, sizeof(*hdr));
+
+ if (hdr < start || hdr_end > end) {
+ pr_err(FW_BUG "Node pointer overflows\n");
+ return -EOVERFLOW;
+ }
+ if (hdr->length < sizeof(*hdr)) {
+ pr_err(FW_BUG "Empty node\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int __init viot_get_pci_iommu_fwnode(struct viot_iommu *viommu,
+ u16 segment, u16 bdf)
+{
+ struct pci_dev *pdev;
+ struct fwnode_handle *fwnode;
+
+ pdev = pci_get_domain_bus_and_slot(segment, PCI_BUS_NUM(bdf),
+ bdf & 0xff);
+ if (!pdev) {
+ pr_err("Could not find PCI IOMMU\n");
+ return -ENODEV;
+ }
+
+ fwnode = pdev->dev.fwnode;
+ if (!fwnode) {
+ /*
+ * PCI devices aren't necessarily described by ACPI. Create a
+ * fwnode so the IOMMU subsystem can identify this device.
+ */
+ fwnode = acpi_alloc_fwnode_static();
+ if (!fwnode) {
+ pci_dev_put(pdev);
+ return -ENOMEM;
+ }
+ set_primary_fwnode(&pdev->dev, fwnode);
+ }
+ viommu->fwnode = pdev->dev.fwnode;
+ pci_dev_put(pdev);
+ return 0;
+}
+
+static int __init viot_get_mmio_iommu_fwnode(struct viot_iommu *viommu,
+ u64 address)
+{
+ struct acpi_device *adev;
+ struct resource res = {
+ .start = address,
+ .end = address,
+ .flags = IORESOURCE_MEM,
+ };
+
+ adev = acpi_resource_consumer(&res);
+ if (!adev) {
+ pr_err("Could not find MMIO IOMMU\n");
+ return -EINVAL;
+ }
+ viommu->fwnode = &adev->fwnode;
+ return 0;
+}
+
+static struct viot_iommu * __init viot_get_iommu(unsigned int offset)
+{
+ int ret;
+ struct viot_iommu *viommu;
+ struct acpi_viot_header *hdr = ACPI_ADD_PTR(struct acpi_viot_header,
+ viot, offset);
+ union {
+ struct acpi_viot_virtio_iommu_pci pci;
+ struct acpi_viot_virtio_iommu_mmio mmio;
+ } *node = (void *)hdr;
+
+ list_for_each_entry(viommu, &viot_iommus, list)
+ if (viommu->offset == offset)
+ return viommu;
+
+ if (viot_check_bounds(hdr))
+ return NULL;
+
+ viommu = kzalloc(sizeof(*viommu), GFP_KERNEL);
+ if (!viommu)
+ return NULL;
+
+ viommu->offset = offset;
+ switch (hdr->type) {
+ case ACPI_VIOT_NODE_VIRTIO_IOMMU_PCI:
+ if (hdr->length < sizeof(node->pci))
+ goto err_free;
+
+ ret = viot_get_pci_iommu_fwnode(viommu, node->pci.segment,
+ node->pci.bdf);
+ break;
+ case ACPI_VIOT_NODE_VIRTIO_IOMMU_MMIO:
+ if (hdr->length < sizeof(node->mmio))
+ goto err_free;
+
+ ret = viot_get_mmio_iommu_fwnode(viommu,
+ node->mmio.base_address);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ if (ret)
+ goto err_free;
+
+ list_add(&viommu->list, &viot_iommus);
+ return viommu;
+
+err_free:
+ kfree(viommu);
+ return NULL;
+}
+
+static int __init viot_parse_node(const struct acpi_viot_header *hdr)
+{
+ int ret = -EINVAL;
+ struct list_head *list;
+ struct viot_endpoint *ep;
+ union {
+ struct acpi_viot_mmio mmio;
+ struct acpi_viot_pci_range pci;
+ } *node = (void *)hdr;
+
+ if (viot_check_bounds(hdr))
+ return -EINVAL;
+
+ if (hdr->type == ACPI_VIOT_NODE_VIRTIO_IOMMU_PCI ||
+ hdr->type == ACPI_VIOT_NODE_VIRTIO_IOMMU_MMIO)
+ return 0;
+
+ ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+ if (!ep)
+ return -ENOMEM;
+
+ switch (hdr->type) {
+ case ACPI_VIOT_NODE_PCI_RANGE:
+ if (hdr->length < sizeof(node->pci)) {
+ pr_err(FW_BUG "Invalid PCI node size\n");
+ goto err_free;
+ }
+
+ ep->segment_start = node->pci.segment_start;
+ ep->segment_end = node->pci.segment_end;
+ ep->bdf_start = node->pci.bdf_start;
+ ep->bdf_end = node->pci.bdf_end;
+ ep->endpoint_id = node->pci.endpoint_start;
+ ep->viommu = viot_get_iommu(node->pci.output_node);
+ list = &viot_pci_ranges;
+ break;
+ case ACPI_VIOT_NODE_MMIO:
+ if (hdr->length < sizeof(node->mmio)) {
+ pr_err(FW_BUG "Invalid MMIO node size\n");
+ goto err_free;
+ }
+
+ ep->address = node->mmio.base_address;
+ ep->endpoint_id = node->mmio.endpoint;
+ ep->viommu = viot_get_iommu(node->mmio.output_node);
+ list = &viot_mmio_endpoints;
+ break;
+ default:
+ pr_warn("Unsupported node %x\n", hdr->type);
+ ret = 0;
+ goto err_free;
+ }
+
+ if (!ep->viommu) {
+ pr_warn("No IOMMU node found\n");
+ /*
+ * A future version of the table may use the node for other
+ * purposes. Keep parsing.
+ */
+ ret = 0;
+ goto err_free;
+ }
+
+ list_add(&ep->list, list);
+ return 0;
+
+err_free:
+ kfree(ep);
+ return ret;
+}
+
+/**
+ * acpi_viot_init - Parse the VIOT table
+ *
+ * Parse the VIOT table, prepare the list of endpoints to be used during DMA
+ * setup of devices.
+ */
+void __init acpi_viot_init(void)
+{
+ int i;
+ acpi_status status;
+ struct acpi_table_header *hdr;
+ struct acpi_viot_header *node;
+
+ status = acpi_get_table(ACPI_SIG_VIOT, 0, &hdr);
+ if (ACPI_FAILURE(status)) {
+ if (status != AE_NOT_FOUND) {
+ const char *msg = acpi_format_exception(status);
+
+ pr_err("Failed to get table, %s\n", msg);
+ }
+ return;
+ }
+
+ viot = (void *)hdr;
+
+ node = ACPI_ADD_PTR(struct acpi_viot_header, viot, viot->node_offset);
+ for (i = 0; i < viot->node_count; i++) {
+ if (viot_parse_node(node))
+ return;
+
+ node = ACPI_ADD_PTR(struct acpi_viot_header, node,
+ node->length);
+ }
+
+ acpi_put_table(hdr);
+}
+
+static int viot_dev_iommu_init(struct device *dev, struct viot_iommu *viommu,
+ u32 epid)
+{
+ const struct iommu_ops *ops;
+
+ if (!viommu)
+ return -ENODEV;
+
+ /* We're not translating ourself */
+ if (viommu->fwnode == dev->fwnode)
+ return -EINVAL;
+
+ ops = iommu_ops_from_fwnode(viommu->fwnode);
+ if (!ops)
+ return IS_ENABLED(CONFIG_VIRTIO_IOMMU) ?
+ -EPROBE_DEFER : -ENODEV;
+
+ return acpi_iommu_fwspec_init(dev, epid, viommu->fwnode, ops);
+}
+
+static int viot_pci_dev_iommu_init(struct pci_dev *pdev, u16 dev_id, void *data)
+{
+ u32 epid;
+ struct viot_endpoint *ep;
+ u32 domain_nr = pci_domain_nr(pdev->bus);
+
+ list_for_each_entry(ep, &viot_pci_ranges, list) {
+ if (domain_nr >= ep->segment_start &&
+ domain_nr <= ep->segment_end &&
+ dev_id >= ep->bdf_start &&
+ dev_id <= ep->bdf_end) {
+ epid = ((domain_nr - ep->segment_start) << 16) +
+ dev_id - ep->bdf_start + ep->endpoint_id;
+
+ /*
+ * If we found a PCI range managed by the viommu, we're
+ * the one that has to request ACS.
+ */
+ pci_request_acs();
+
+ return viot_dev_iommu_init(&pdev->dev, ep->viommu,
+ epid);
+ }
+ }
+ return -ENODEV;
+}
+
+static int viot_mmio_dev_iommu_init(struct platform_device *pdev)
+{
+ struct resource *mem;
+ struct viot_endpoint *ep;
+
+ mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!mem)
+ return -ENODEV;
+
+ list_for_each_entry(ep, &viot_mmio_endpoints, list) {
+ if (ep->address == mem->start)
+ return viot_dev_iommu_init(&pdev->dev, ep->viommu,
+ ep->endpoint_id);
+ }
+ return -ENODEV;
+}
+
+/**
+ * viot_iommu_configure - Setup IOMMU ops for an endpoint described by VIOT
+ * @dev: the endpoint
+ *
+ * Return: 0 on success, <0 on failure
+ */
+int viot_iommu_configure(struct device *dev)
+{
+ if (dev_is_pci(dev))
+ return pci_for_each_dma_alias(to_pci_dev(dev),
+ viot_pci_dev_iommu_init, NULL);
+ else if (dev_is_platform(dev))
+ return viot_mmio_dev_iommu_init(to_platform_device(dev));
+ return -ENODEV;
+}
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 1f111b399bca..07b7c25cbed8 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -400,9 +400,11 @@ config HYPERV_IOMMU
config VIRTIO_IOMMU
tristate "Virtio IOMMU driver"
depends on VIRTIO
- depends on ARM64
+ depends on (ARM64 || X86)
select IOMMU_API
+ select IOMMU_DMA
select INTERVAL_TREE
+ select ACPI_VIOT if ACPI
help
Para-virtualised IOMMU driver with virtio.
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 55dd38d814d9..416815a525d6 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -11,8 +11,6 @@
#include "amd_iommu_types.h"
-extern int amd_iommu_init_dma_ops(void);
-extern int amd_iommu_init_passthrough(void);
extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
extern void amd_iommu_apply_erratum_63(u16 devid);
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index d006724f4dc2..46280e6e1535 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -153,7 +153,8 @@ int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
static bool amd_iommu_detected;
-static bool __initdata amd_iommu_disabled;
+static bool amd_iommu_disabled __initdata;
+static bool amd_iommu_force_enable __initdata;
static int amd_iommu_target_ivhd_type;
u16 amd_iommu_last_bdf; /* largest PCI device id we have
@@ -231,7 +232,6 @@ enum iommu_init_state {
IOMMU_ENABLED,
IOMMU_PCI_INIT,
IOMMU_INTERRUPTS_EN,
- IOMMU_DMA_OPS,
IOMMU_INITIALIZED,
IOMMU_NOT_FOUND,
IOMMU_INIT_ERROR,
@@ -1908,8 +1908,8 @@ static void print_iommu_info(void)
pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
- pci_info(pdev, "Extended features (%#llx):",
- iommu->features);
+ pr_info("Extended features (%#llx):", iommu->features);
+
for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
if (iommu_feature(iommu, (1ULL << i)))
pr_cont(" %s", feat_str[i]);
@@ -2817,7 +2817,7 @@ out:
return ret;
}
-static bool detect_ivrs(void)
+static bool __init detect_ivrs(void)
{
struct acpi_table_header *ivrs_base;
acpi_status status;
@@ -2834,6 +2834,9 @@ static bool detect_ivrs(void)
acpi_put_table(ivrs_base);
+ if (amd_iommu_force_enable)
+ goto out;
+
/* Don't use IOMMU if there is Stoney Ridge graphics */
for (i = 0; i < 32; i++) {
u32 pci_id;
@@ -2845,6 +2848,7 @@ static bool detect_ivrs(void)
}
}
+out:
/* Make sure ACS will be enabled during PCI probe */
pci_request_acs();
@@ -2895,10 +2899,6 @@ static int __init state_next(void)
init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
break;
case IOMMU_INTERRUPTS_EN:
- ret = amd_iommu_init_dma_ops();
- init_state = ret ? IOMMU_INIT_ERROR : IOMMU_DMA_OPS;
- break;
- case IOMMU_DMA_OPS:
init_state = IOMMU_INITIALIZED;
break;
case IOMMU_INITIALIZED:
@@ -3100,6 +3100,8 @@ static int __init parse_amd_iommu_options(char *str)
for (; *str; ++str) {
if (strncmp(str, "fullflush", 9) == 0)
amd_iommu_unmap_flush = true;
+ if (strncmp(str, "force_enable", 12) == 0)
+ amd_iommu_force_enable = true;
if (strncmp(str, "off", 3) == 0)
amd_iommu_disabled = true;
if (strncmp(str, "force_isolation", 15) == 0)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 3ac42bbdefc6..811a49a95d04 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -30,7 +30,6 @@
#include <linux/msi.h>
#include <linux/irqdomain.h>
#include <linux/percpu.h>
-#include <linux/iova.h>
#include <linux/io-pgtable.h>
#include <asm/irq_remapping.h>
#include <asm/io_apic.h>
@@ -1713,7 +1712,7 @@ static void amd_iommu_probe_finalize(struct device *dev)
/* Domains are initialized for this device - have a look what we ended up with */
domain = iommu_get_domain_for_dev(dev);
if (domain->type == IOMMU_DOMAIN_DMA)
- iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, 0);
+ iommu_setup_dma_ops(dev, 0, U64_MAX);
else
set_dma_ops(dev, NULL);
}
@@ -1773,13 +1772,22 @@ void amd_iommu_domain_update(struct protection_domain *domain)
amd_iommu_domain_flush_complete(domain);
}
+static void __init amd_iommu_init_dma_ops(void)
+{
+ swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
+
+ if (amd_iommu_unmap_flush)
+ pr_info("IO/TLB flush on unmap enabled\n");
+ else
+ pr_info("Lazy IO/TLB flushing enabled\n");
+ iommu_set_dma_strict(amd_iommu_unmap_flush);
+}
+
int __init amd_iommu_init_api(void)
{
- int ret, err = 0;
+ int err;
- ret = iova_cache_get();
- if (ret)
- return ret;
+ amd_iommu_init_dma_ops();
err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
if (err)
@@ -1796,19 +1804,6 @@ int __init amd_iommu_init_api(void)
return 0;
}
-int __init amd_iommu_init_dma_ops(void)
-{
- swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
-
- if (amd_iommu_unmap_flush)
- pr_info("IO/TLB flush on unmap enabled\n");
- else
- pr_info("Lazy IO/TLB flushing enabled\n");
- iommu_set_dma_strict(amd_iommu_unmap_flush);
- return 0;
-
-}
-
/*****************************************************************************
*
* The following functions belong to the exported interface of AMD IOMMU
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index bb251cab61f3..ee66d1f4cb81 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -435,9 +435,13 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
return true;
}
-static bool arm_smmu_iopf_supported(struct arm_smmu_master *master)
+bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master)
{
- return false;
+ /* We're not keeping track of SIDs in fault events */
+ if (master->num_streams != 1)
+ return false;
+
+ return master->stall_enabled;
}
bool arm_smmu_master_sva_supported(struct arm_smmu_master *master)
@@ -445,8 +449,8 @@ bool arm_smmu_master_sva_supported(struct arm_smmu_master *master)
if (!(master->smmu->features & ARM_SMMU_FEAT_SVA))
return false;
- /* SSID and IOPF support are mandatory for the moment */
- return master->ssid_bits && arm_smmu_iopf_supported(master);
+ /* SSID support is mandatory for the moment */
+ return master->ssid_bits;
}
bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
@@ -459,13 +463,55 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
return enabled;
}
+static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master)
+{
+ int ret;
+ struct device *dev = master->dev;
+
+ /*
+ * Drivers for devices supporting PRI or stall should enable IOPF first.
+ * Others have device-specific fault handlers and don't need IOPF.
+ */
+ if (!arm_smmu_master_iopf_supported(master))
+ return 0;
+
+ if (!master->iopf_enabled)
+ return -EINVAL;
+
+ ret = iopf_queue_add_device(master->smmu->evtq.iopf, dev);
+ if (ret)
+ return ret;
+
+ ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
+ if (ret) {
+ iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
+ return ret;
+ }
+ return 0;
+}
+
+static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master)
+{
+ struct device *dev = master->dev;
+
+ if (!master->iopf_enabled)
+ return;
+
+ iommu_unregister_device_fault_handler(dev);
+ iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
+}
+
int arm_smmu_master_enable_sva(struct arm_smmu_master *master)
{
+ int ret;
+
mutex_lock(&sva_lock);
- master->sva_enabled = true;
+ ret = arm_smmu_master_sva_enable_iopf(master);
+ if (!ret)
+ master->sva_enabled = true;
mutex_unlock(&sva_lock);
- return 0;
+ return ret;
}
int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
@@ -476,6 +522,7 @@ int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
mutex_unlock(&sva_lock);
return -EBUSY;
}
+ arm_smmu_master_sva_disable_iopf(master);
master->sva_enabled = false;
mutex_unlock(&sva_lock);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 54b2f27b81d4..dd20b01771c4 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -23,7 +23,6 @@
#include <linux/msi.h>
#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_iommu.h>
#include <linux/of_platform.h>
#include <linux/pci.h>
#include <linux/pci-ats.h>
@@ -32,6 +31,7 @@
#include <linux/amba/bus.h>
#include "arm-smmu-v3.h"
+#include "../../iommu-sva-lib.h"
static bool disable_bypass = true;
module_param(disable_bypass, bool, 0444);
@@ -313,6 +313,11 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
}
cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
break;
+ case CMDQ_OP_RESUME:
+ cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
+ cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
+ cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
+ break;
case CMDQ_OP_CMD_SYNC:
if (ent->sync.msiaddr) {
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
@@ -352,7 +357,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
{
- static const char *cerror_str[] = {
+ static const char * const cerror_str[] = {
[CMDQ_ERR_CERROR_NONE_IDX] = "No error",
[CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
[CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
@@ -876,6 +881,44 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
}
+static int arm_smmu_page_response(struct device *dev,
+ struct iommu_fault_event *unused,
+ struct iommu_page_response *resp)
+{
+ struct arm_smmu_cmdq_ent cmd = {0};
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ int sid = master->streams[0].id;
+
+ if (master->stall_enabled) {
+ cmd.opcode = CMDQ_OP_RESUME;
+ cmd.resume.sid = sid;
+ cmd.resume.stag = resp->grpid;
+ switch (resp->code) {
+ case IOMMU_PAGE_RESP_INVALID:
+ case IOMMU_PAGE_RESP_FAILURE:
+ cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
+ break;
+ case IOMMU_PAGE_RESP_SUCCESS:
+ cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
+ break;
+ default:
+ return -EINVAL;
+ }
+ } else {
+ return -ENODEV;
+ }
+
+ arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
+ /*
+ * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
+ * RESUME consumption guarantees that the stalled transaction will be
+ * terminated... at some point in the future. PRI_RESP is fire and
+ * forget.
+ */
+
+ return 0;
+}
+
/* Context descriptor manipulation functions */
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
{
@@ -986,7 +1029,6 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
u64 val;
bool cd_live;
__le64 *cdptr;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
return -E2BIG;
@@ -1031,8 +1073,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
CTXDESC_CD_0_V;
- /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
- if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
+ if (smmu_domain->stall_enabled)
val |= CTXDESC_CD_0_S;
}
@@ -1276,7 +1317,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
FIELD_PREP(STRTAB_STE_1_STRW, strw));
if (smmu->features & ARM_SMMU_FEAT_STALLS &&
- !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
+ !master->stall_enabled)
dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
@@ -1353,7 +1394,6 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
return 0;
}
-__maybe_unused
static struct arm_smmu_master *
arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
{
@@ -1377,18 +1417,118 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
}
/* IRQ and event handlers */
+static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
+{
+ int ret;
+ u32 reason;
+ u32 perm = 0;
+ struct arm_smmu_master *master;
+ bool ssid_valid = evt[0] & EVTQ_0_SSV;
+ u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
+ struct iommu_fault_event fault_evt = { };
+ struct iommu_fault *flt = &fault_evt.fault;
+
+ switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
+ case EVT_ID_TRANSLATION_FAULT:
+ reason = IOMMU_FAULT_REASON_PTE_FETCH;
+ break;
+ case EVT_ID_ADDR_SIZE_FAULT:
+ reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
+ break;
+ case EVT_ID_ACCESS_FAULT:
+ reason = IOMMU_FAULT_REASON_ACCESS;
+ break;
+ case EVT_ID_PERMISSION_FAULT:
+ reason = IOMMU_FAULT_REASON_PERMISSION;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ /* Stage-2 is always pinned at the moment */
+ if (evt[1] & EVTQ_1_S2)
+ return -EFAULT;
+
+ if (evt[1] & EVTQ_1_RnW)
+ perm |= IOMMU_FAULT_PERM_READ;
+ else
+ perm |= IOMMU_FAULT_PERM_WRITE;
+
+ if (evt[1] & EVTQ_1_InD)
+ perm |= IOMMU_FAULT_PERM_EXEC;
+
+ if (evt[1] & EVTQ_1_PnU)
+ perm |= IOMMU_FAULT_PERM_PRIV;
+
+ if (evt[1] & EVTQ_1_STALL) {
+ flt->type = IOMMU_FAULT_PAGE_REQ;
+ flt->prm = (struct iommu_fault_page_request) {
+ .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
+ .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
+ .perm = perm,
+ .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
+ };
+
+ if (ssid_valid) {
+ flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
+ }
+ } else {
+ flt->type = IOMMU_FAULT_DMA_UNRECOV;
+ flt->event = (struct iommu_fault_unrecoverable) {
+ .reason = reason,
+ .flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
+ .perm = perm,
+ .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
+ };
+
+ if (ssid_valid) {
+ flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
+ flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
+ }
+ }
+
+ mutex_lock(&smmu->streams_mutex);
+ master = arm_smmu_find_master(smmu, sid);
+ if (!master) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ ret = iommu_report_device_fault(master->dev, &fault_evt);
+ if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
+ /* Nobody cared, abort the access */
+ struct iommu_page_response resp = {
+ .pasid = flt->prm.pasid,
+ .grpid = flt->prm.grpid,
+ .code = IOMMU_PAGE_RESP_FAILURE,
+ };
+ arm_smmu_page_response(master->dev, &fault_evt, &resp);
+ }
+
+out_unlock:
+ mutex_unlock(&smmu->streams_mutex);
+ return ret;
+}
+
static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
{
- int i;
+ int i, ret;
struct arm_smmu_device *smmu = dev;
struct arm_smmu_queue *q = &smmu->evtq.q;
struct arm_smmu_ll_queue *llq = &q->llq;
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
u64 evt[EVTQ_ENT_DWORDS];
do {
while (!queue_remove_raw(q, evt)) {
u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
+ ret = arm_smmu_handle_evt(smmu, evt);
+ if (!ret || !__ratelimit(&rs))
+ continue;
+
dev_info(smmu->dev, "event 0x%02x received:\n", id);
for (i = 0; i < ARRAY_SIZE(evt); ++i)
dev_info(smmu->dev, "\t0x%016llx\n",
@@ -1923,6 +2063,8 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
cfg->s1cdmax = master->ssid_bits;
+ smmu_domain->stall_enabled = master->stall_enabled;
+
ret = arm_smmu_alloc_cd_tables(smmu_domain);
if (ret)
goto out_free_asid;
@@ -2270,6 +2412,12 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
ret = -EINVAL;
goto out_unlock;
+ } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
+ smmu_domain->stall_enabled != master->stall_enabled) {
+ dev_err(dev, "cannot attach to stall-%s domain\n",
+ smmu_domain->stall_enabled ? "enabled" : "disabled");
+ ret = -EINVAL;
+ goto out_unlock;
}
master->domain = smmu_domain;
@@ -2508,6 +2656,11 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
master->ssid_bits = min_t(u8, master->ssid_bits,
CTXDESC_LINEAR_CDMAX);
+ if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
+ device_property_read_bool(dev, "dma-can-stall")) ||
+ smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
+ master->stall_enabled = true;
+
return &smmu->iommu;
err_free_master:
@@ -2525,7 +2678,8 @@ static void arm_smmu_release_device(struct device *dev)
return;
master = dev_iommu_priv_get(dev);
- WARN_ON(arm_smmu_master_sva_enabled(master));
+ if (WARN_ON(arm_smmu_master_sva_enabled(master)))
+ iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
arm_smmu_detach_dev(master);
arm_smmu_disable_pasid(master);
arm_smmu_remove_master(master);
@@ -2595,6 +2749,8 @@ static bool arm_smmu_dev_has_feature(struct device *dev,
return false;
switch (feat) {
+ case IOMMU_DEV_FEAT_IOPF:
+ return arm_smmu_master_iopf_supported(master);
case IOMMU_DEV_FEAT_SVA:
return arm_smmu_master_sva_supported(master);
default:
@@ -2611,6 +2767,8 @@ static bool arm_smmu_dev_feature_enabled(struct device *dev,
return false;
switch (feat) {
+ case IOMMU_DEV_FEAT_IOPF:
+ return master->iopf_enabled;
case IOMMU_DEV_FEAT_SVA:
return arm_smmu_master_sva_enabled(master);
default:
@@ -2621,6 +2779,8 @@ static bool arm_smmu_dev_feature_enabled(struct device *dev,
static int arm_smmu_dev_enable_feature(struct device *dev,
enum iommu_dev_features feat)
{
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+
if (!arm_smmu_dev_has_feature(dev, feat))
return -ENODEV;
@@ -2628,8 +2788,11 @@ static int arm_smmu_dev_enable_feature(struct device *dev,
return -EBUSY;
switch (feat) {
+ case IOMMU_DEV_FEAT_IOPF:
+ master->iopf_enabled = true;
+ return 0;
case IOMMU_DEV_FEAT_SVA:
- return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
+ return arm_smmu_master_enable_sva(master);
default:
return -EINVAL;
}
@@ -2638,12 +2801,19 @@ static int arm_smmu_dev_enable_feature(struct device *dev,
static int arm_smmu_dev_disable_feature(struct device *dev,
enum iommu_dev_features feat)
{
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+
if (!arm_smmu_dev_feature_enabled(dev, feat))
return -EINVAL;
switch (feat) {
+ case IOMMU_DEV_FEAT_IOPF:
+ if (master->sva_enabled)
+ return -EBUSY;
+ master->iopf_enabled = false;
+ return 0;
case IOMMU_DEV_FEAT_SVA:
- return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
+ return arm_smmu_master_disable_sva(master);
default:
return -EINVAL;
}
@@ -2673,6 +2843,7 @@ static struct iommu_ops arm_smmu_ops = {
.sva_bind = arm_smmu_sva_bind,
.sva_unbind = arm_smmu_sva_unbind,
.sva_get_pasid = arm_smmu_sva_get_pasid,
+ .page_response = arm_smmu_page_response,
.pgsize_bitmap = -1UL, /* Restricted during device attach */
.owner = THIS_MODULE,
};
@@ -2771,6 +2942,13 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
if (ret)
return ret;
+ if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
+ (smmu->features & ARM_SMMU_FEAT_STALLS)) {
+ smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
+ if (!smmu->evtq.iopf)
+ return -ENOMEM;
+ }
+
/* priq */
if (!(smmu->features & ARM_SMMU_FEAT_PRI))
return 0;
@@ -2788,10 +2966,8 @@ static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
void *strtab = smmu->strtab_cfg.strtab;
cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
- if (!cfg->l1_desc) {
- dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
+ if (!cfg->l1_desc)
return -ENOMEM;
- }
for (i = 0; i < cfg->num_l1_ents; ++i) {
arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
@@ -3582,10 +3758,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
bool bypass;
smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
- if (!smmu) {
- dev_err(dev, "failed to allocate arm_smmu_device\n");
+ if (!smmu)
return -ENOMEM;
- }
smmu->dev = dev;
if (dev->of_node) {
@@ -3669,10 +3843,20 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
if (ret) {
dev_err(dev, "Failed to register iommu\n");
- return ret;
+ goto err_sysfs_remove;
}
- return arm_smmu_set_bus_ops(&arm_smmu_ops);
+ ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
+ if (ret)
+ goto err_unregister_device;
+
+ return 0;
+
+err_unregister_device:
+ iommu_device_unregister(&smmu->iommu);
+err_sysfs_remove:
+ iommu_device_sysfs_remove(&smmu->iommu);
+ return ret;
}
static int arm_smmu_device_remove(struct platform_device *pdev)
@@ -3683,6 +3867,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
iommu_device_unregister(&smmu->iommu);
iommu_device_sysfs_remove(&smmu->iommu);
arm_smmu_device_disable(smmu);
+ iopf_queue_free(smmu->evtq.iopf);
return 0;
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 46e8c49214a8..4cb136f07914 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -184,6 +184,7 @@
#else
#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_ORDER - 1)
#endif
+#define Q_MIN_SZ_SHIFT (PAGE_SHIFT)
/*
* Stream table.
@@ -354,6 +355,13 @@
#define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
#define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12)
+#define CMDQ_RESUME_0_RESP_TERM 0UL
+#define CMDQ_RESUME_0_RESP_RETRY 1UL
+#define CMDQ_RESUME_0_RESP_ABORT 2UL
+#define CMDQ_RESUME_0_RESP GENMASK_ULL(13, 12)
+#define CMDQ_RESUME_0_SID GENMASK_ULL(63, 32)
+#define CMDQ_RESUME_1_STAG GENMASK_ULL(15, 0)
+
#define CMDQ_SYNC_0_CS GENMASK_ULL(13, 12)
#define CMDQ_SYNC_0_CS_NONE 0
#define CMDQ_SYNC_0_CS_IRQ 1
@@ -366,14 +374,33 @@
/* Event queue */
#define EVTQ_ENT_SZ_SHIFT 5
#define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
-#define EVTQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
+#define EVTQ_MAX_SZ_SHIFT (Q_MIN_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
#define EVTQ_0_ID GENMASK_ULL(7, 0)
+#define EVT_ID_TRANSLATION_FAULT 0x10
+#define EVT_ID_ADDR_SIZE_FAULT 0x11
+#define EVT_ID_ACCESS_FAULT 0x12
+#define EVT_ID_PERMISSION_FAULT 0x13
+
+#define EVTQ_0_SSV (1UL << 11)
+#define EVTQ_0_SSID GENMASK_ULL(31, 12)
+#define EVTQ_0_SID GENMASK_ULL(63, 32)
+#define EVTQ_1_STAG GENMASK_ULL(15, 0)
+#define EVTQ_1_STALL (1UL << 31)
+#define EVTQ_1_PnU (1UL << 33)
+#define EVTQ_1_InD (1UL << 34)
+#define EVTQ_1_RnW (1UL << 35)
+#define EVTQ_1_S2 (1UL << 39)
+#define EVTQ_1_CLASS GENMASK_ULL(41, 40)
+#define EVTQ_1_TT_READ (1UL << 44)
+#define EVTQ_2_ADDR GENMASK_ULL(63, 0)
+#define EVTQ_3_IPA GENMASK_ULL(51, 12)
+
/* PRI queue */
#define PRIQ_ENT_SZ_SHIFT 4
#define PRIQ_ENT_DWORDS ((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
-#define PRIQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
+#define PRIQ_MAX_SZ_SHIFT (Q_MIN_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
#define PRIQ_0_SID GENMASK_ULL(31, 0)
#define PRIQ_0_SSID GENMASK_ULL(51, 32)
@@ -462,6 +489,13 @@ struct arm_smmu_cmdq_ent {
enum pri_resp resp;
} pri;
+ #define CMDQ_OP_RESUME 0x44
+ struct {
+ u32 sid;
+ u16 stag;
+ u8 resp;
+ } resume;
+
#define CMDQ_OP_CMD_SYNC 0x46
struct {
u64 msiaddr;
@@ -520,6 +554,7 @@ struct arm_smmu_cmdq_batch {
struct arm_smmu_evtq {
struct arm_smmu_queue q;
+ struct iopf_queue *iopf;
u32 max_stalls;
};
@@ -657,7 +692,9 @@ struct arm_smmu_master {
struct arm_smmu_stream *streams;
unsigned int num_streams;
bool ats_enabled;
+ bool stall_enabled;
bool sva_enabled;
+ bool iopf_enabled;
struct list_head bonds;
unsigned int ssid_bits;
};
@@ -675,6 +712,7 @@ struct arm_smmu_domain {
struct mutex init_mutex; /* Protects smmu pointer */
struct io_pgtable_ops *pgtbl_ops;
+ bool stall_enabled;
atomic_t nr_ats_masters;
enum arm_smmu_domain_stage stage;
@@ -716,6 +754,7 @@ bool arm_smmu_master_sva_supported(struct arm_smmu_master *master);
bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master);
int arm_smmu_master_enable_sva(struct arm_smmu_master *master);
int arm_smmu_master_disable_sva(struct arm_smmu_master *master);
+bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master);
struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
void *drvdata);
void arm_smmu_sva_unbind(struct iommu_sva *handle);
@@ -747,6 +786,11 @@ static inline int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
return -ENODEV;
}
+static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master)
+{
+ return false;
+}
+
static inline struct iommu_sva *
arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
{
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 61fc645c1325..9b9d13ec5a88 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -3,6 +3,7 @@
* Copyright (c) 2019, The Linux Foundation. All rights reserved.
*/
+#include <linux/acpi.h>
#include <linux/adreno-smmu-priv.h>
#include <linux/of_device.h>
#include <linux/qcom_scm.h>
@@ -177,6 +178,16 @@ static int qcom_adreno_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_doma
return __arm_smmu_alloc_bitmap(smmu->context_map, start, count);
}
+static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
+{
+ const struct device_node *np = smmu->dev->of_node;
+
+ if (of_device_is_compatible(np, "qcom,msm8996-smmu-v2"))
+ return false;
+
+ return true;
+}
+
static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
{
@@ -191,7 +202,8 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
* be AARCH64 stage 1 but double check because the arm-smmu code assumes
* that is the case when the TTBR1 quirk is enabled
*/
- if ((smmu_domain->stage == ARM_SMMU_DOMAIN_S1) &&
+ if (qcom_adreno_can_do_ttbr1(smmu_domain->smmu) &&
+ (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) &&
(smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64))
pgtbl_cfg->quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1;
@@ -216,6 +228,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,mdss" },
{ .compatible = "qcom,sc7180-mdss" },
{ .compatible = "qcom,sc7180-mss-pil" },
+ { .compatible = "qcom,sc7280-mdss" },
{ .compatible = "qcom,sc8180x-mdss" },
{ .compatible = "qcom,sdm845-mdss" },
{ .compatible = "qcom,sdm845-mss-pil" },
@@ -380,24 +393,48 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
{ .compatible = "qcom,msm8998-smmu-v2" },
{ .compatible = "qcom,sc7180-smmu-500" },
+ { .compatible = "qcom,sc7280-smmu-500" },
{ .compatible = "qcom,sc8180x-smmu-500" },
{ .compatible = "qcom,sdm630-smmu-v2" },
{ .compatible = "qcom,sdm845-smmu-500" },
+ { .compatible = "qcom,sm6125-smmu-500" },
{ .compatible = "qcom,sm8150-smmu-500" },
{ .compatible = "qcom,sm8250-smmu-500" },
{ .compatible = "qcom,sm8350-smmu-500" },
{ }
};
+#ifdef CONFIG_ACPI
+static struct acpi_platform_list qcom_acpi_platlist[] = {
+ { "LENOVO", "CB-01 ", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" },
+ { "QCOM ", "QCOMEDK2", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" },
+ { }
+};
+#endif
+
struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
{
const struct device_node *np = smmu->dev->of_node;
- if (of_match_node(qcom_smmu_impl_of_match, np))
- return qcom_smmu_create(smmu, &qcom_smmu_impl);
+#ifdef CONFIG_ACPI
+ if (np == NULL) {
+ /* Match platform for ACPI boot */
+ if (acpi_match_platform_list(qcom_acpi_platlist) >= 0)
+ return qcom_smmu_create(smmu, &qcom_smmu_impl);
+ }
+#endif
+ /*
+ * Do not change this order of implementation, i.e., first adreno
+ * smmu impl and then apss smmu since we can have both implementing
+ * arm,mmu-500 in which case we will miss setting adreno smmu specific
+ * features if the order is changed.
+ */
if (of_device_is_compatible(np, "qcom,adreno-smmu"))
return qcom_smmu_create(smmu, &qcom_adreno_smmu_impl);
+ if (of_match_node(qcom_smmu_impl_of_match, np))
+ return qcom_smmu_create(smmu, &qcom_smmu_impl);
+
return smmu;
}
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index b4b32d31fc06..f22dbeb1e510 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -31,7 +31,6 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_device.h>
-#include <linux/of_iommu.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
@@ -74,7 +73,7 @@ static bool using_legacy_binding, using_generic_binding;
static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
{
if (pm_runtime_enabled(smmu->dev))
- return pm_runtime_get_sync(smmu->dev);
+ return pm_runtime_resume_and_get(smmu->dev);
return 0;
}
@@ -1276,6 +1275,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
u64 phys;
unsigned long va, flags;
int ret, idx = cfg->cbndx;
+ phys_addr_t addr = 0;
ret = arm_smmu_rpm_get(smmu);
if (ret < 0)
@@ -1295,6 +1295,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
dev_err(dev,
"iova to phys timed out on %pad. Falling back to software table walk.\n",
&iova);
+ arm_smmu_rpm_put(smmu);
return ops->iova_to_phys(ops, iova);
}
@@ -1303,12 +1304,14 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
if (phys & ARM_SMMU_CB_PAR_F) {
dev_err(dev, "translation fault!\n");
dev_err(dev, "PAR = 0x%llx\n", phys);
- return 0;
+ goto out;
}
+ addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
+out:
arm_smmu_rpm_put(smmu);
- return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
+ return addr;
}
static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
@@ -1455,6 +1458,18 @@ static void arm_smmu_release_device(struct device *dev)
iommu_fwspec_free(dev);
}
+static void arm_smmu_probe_finalize(struct device *dev)
+{
+ struct arm_smmu_master_cfg *cfg;
+ struct arm_smmu_device *smmu;
+
+ cfg = dev_iommu_priv_get(dev);
+ smmu = cfg->smmu;
+
+ if (smmu->impl && smmu->impl->probe_finalize)
+ smmu->impl->probe_finalize(smmu, dev);
+}
+
static struct iommu_group *arm_smmu_device_group(struct device *dev)
{
struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
@@ -1574,6 +1589,7 @@ static struct iommu_ops arm_smmu_ops = {
.iova_to_phys = arm_smmu_iova_to_phys,
.probe_device = arm_smmu_probe_device,
.release_device = arm_smmu_release_device,
+ .probe_finalize = arm_smmu_probe_finalize,
.device_group = arm_smmu_device_group,
.enable_nesting = arm_smmu_enable_nesting,
.set_pgtable_quirks = arm_smmu_set_pgtable_quirks,
@@ -2169,7 +2185,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
if (err) {
dev_err(dev, "Failed to register iommu\n");
- return err;
+ goto err_sysfs_remove;
}
platform_set_drvdata(pdev, smmu);
@@ -2192,10 +2208,19 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
* any device which might need it, so we want the bus ops in place
* ready to handle default domain setup as soon as any SMMU exists.
*/
- if (!using_legacy_binding)
- return arm_smmu_bus_init(&arm_smmu_ops);
+ if (!using_legacy_binding) {
+ err = arm_smmu_bus_init(&arm_smmu_ops);
+ if (err)
+ goto err_unregister_device;
+ }
return 0;
+
+err_unregister_device:
+ iommu_device_unregister(&smmu->iommu);
+err_sysfs_remove:
+ iommu_device_sysfs_remove(&smmu->iommu);
+ return err;
}
static int arm_smmu_device_remove(struct platform_device *pdev)
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 84c21c4b0691..a50271595960 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -441,6 +441,7 @@ struct arm_smmu_impl {
struct device *dev, int start);
void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 reg);
+ void (*probe_finalize)(struct arm_smmu_device *smmu, struct device *dev);
};
#define INVALID_SMENDX -1
diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index 4294abe389b2..25ed444ff94d 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -25,7 +25,6 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_device.h>
-#include <linux/of_iommu.h>
#include <linux/platform_device.h>
#include <linux/pm.h>
#include <linux/pm_runtime.h>
@@ -850,10 +849,12 @@ static int qcom_iommu_device_probe(struct platform_device *pdev)
ret = iommu_device_register(&qcom_iommu->iommu, &qcom_iommu_ops, dev);
if (ret) {
dev_err(dev, "Failed to register iommu\n");
- return ret;
+ goto err_sysfs_remove;
}
- bus_set_iommu(&platform_bus_type, &qcom_iommu_ops);
+ ret = bus_set_iommu(&platform_bus_type, &qcom_iommu_ops);
+ if (ret)
+ goto err_unregister_device;
if (qcom_iommu->local_base) {
pm_runtime_get_sync(dev);
@@ -862,6 +863,13 @@ static int qcom_iommu_device_probe(struct platform_device *pdev)
}
return 0;
+
+err_unregister_device:
+ iommu_device_unregister(&qcom_iommu->iommu);
+
+err_sysfs_remove:
+ iommu_device_sysfs_remove(&qcom_iommu->iommu);
+ return ret;
}
static int qcom_iommu_device_remove(struct platform_device *pdev)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 7bcdd1205535..98ba927aee1a 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -243,9 +243,11 @@ resv_iova:
lo = iova_pfn(iovad, start);
hi = iova_pfn(iovad, end);
reserve_iova(iovad, lo, hi);
- } else {
+ } else if (end < start) {
/* dma_ranges list should be sorted */
- dev_err(&dev->dev, "Failed to reserve IOVA\n");
+ dev_err(&dev->dev,
+ "Failed to reserve IOVA [%pa-%pa]\n",
+ &start, &end);
return -EINVAL;
}
@@ -319,16 +321,16 @@ static bool dev_is_untrusted(struct device *dev)
* iommu_dma_init_domain - Initialise a DMA mapping domain
* @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
* @base: IOVA at which the mappable address space starts
- * @size: Size of IOVA space
+ * @limit: Last address of the IOVA space
* @dev: Device the domain is being initialised for
*
- * @base and @size should be exact multiples of IOMMU page granularity to
+ * @base and @limit + 1 should be exact multiples of IOMMU page granularity to
* avoid rounding surprises. If necessary, we reserve the page at address 0
* to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
* any change which could make prior IOVAs invalid will fail.
*/
static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
- u64 size, struct device *dev)
+ dma_addr_t limit, struct device *dev)
{
struct iommu_dma_cookie *cookie = domain->iova_cookie;
unsigned long order, base_pfn;
@@ -346,7 +348,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
/* Check the domain allows at least some access to the device... */
if (domain->geometry.force_aperture) {
if (base > domain->geometry.aperture_end ||
- base + size <= domain->geometry.aperture_start) {
+ limit < domain->geometry.aperture_start) {
pr_warn("specified DMA range outside IOMMU capability\n");
return -EFAULT;
}
@@ -1308,7 +1310,7 @@ static const struct dma_map_ops iommu_dma_ops = {
* The IOMMU core code allocates the default DMA domain, which the underlying
* IOMMU driver needs to support via the dma-iommu layer.
*/
-void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size)
+void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
@@ -1320,7 +1322,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size)
* underlying IOMMU driver needs to support via the dma-iommu layer.
*/
if (domain->type == IOMMU_DOMAIN_DMA) {
- if (iommu_dma_init_domain(domain, dma_base, size, dev))
+ if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev))
goto out_err;
dev->dma_ops = &iommu_dma_ops;
}
@@ -1330,6 +1332,7 @@ out_err:
pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
dev_name(dev));
}
+EXPORT_SYMBOL_GPL(iommu_setup_dma_ops);
static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
phys_addr_t msi_addr, struct iommu_domain *domain)
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 7623d8c371f5..d0fbf1d10e18 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -17,7 +17,6 @@
#include <linux/kmemleak.h>
#include <linux/list.h>
#include <linux/of.h>
-#include <linux/of_iommu.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 28a3d1596c76..43ebd8af11c5 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -3,6 +3,9 @@
config DMAR_TABLE
bool
+config DMAR_PERF
+ bool
+
config INTEL_IOMMU
bool "Support for Intel IOMMU using DMA Remapping Devices"
depends on PCI_MSI && ACPI && (X86 || IA64)
@@ -14,6 +17,7 @@ config INTEL_IOMMU
select SWIOTLB
select IOASID
select IOMMU_DMA
+ select PCI_ATS
help
DMA remapping (DMAR) devices support enables independent address
translations for Direct Memory Access (DMA) from devices.
@@ -24,6 +28,7 @@ config INTEL_IOMMU
config INTEL_IOMMU_DEBUGFS
bool "Export Intel IOMMU internals in Debugfs"
depends on INTEL_IOMMU && IOMMU_DEBUGFS
+ select DMAR_PERF
help
!!!WARNING!!!
@@ -41,6 +46,7 @@ config INTEL_IOMMU_SVM
select PCI_PRI
select MMU_NOTIFIER
select IOASID
+ select IOMMU_SVA_LIB
help
Shared Virtual Memory (SVM) provides a facility for devices
to access DMA resources through process address space by
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index ae236ec7d219..fa0dae16441c 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -2,6 +2,7 @@
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o
obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
+obj-$(CONFIG_DMAR_PERF) += perf.o
obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index efea7f02abd9..62e23ff3c987 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -16,6 +16,7 @@
#include <asm/irq_remapping.h>
#include "pasid.h"
+#include "perf.h"
struct tbl_walk {
u16 bus;
@@ -31,6 +32,9 @@ struct iommu_regset {
const char *regs;
};
+#define DEBUG_BUFFER_SIZE 1024
+static char debug_buf[DEBUG_BUFFER_SIZE];
+
#define IOMMU_REGSET_ENTRY(_reg_) \
{ DMAR_##_reg_##_REG, __stringify(_reg_) }
@@ -538,6 +542,111 @@ static int ir_translation_struct_show(struct seq_file *m, void *unused)
DEFINE_SHOW_ATTRIBUTE(ir_translation_struct);
#endif
+static void latency_show_one(struct seq_file *m, struct intel_iommu *iommu,
+ struct dmar_drhd_unit *drhd)
+{
+ int ret;
+
+ seq_printf(m, "IOMMU: %s Register Base Address: %llx\n",
+ iommu->name, drhd->reg_base_addr);
+
+ ret = dmar_latency_snapshot(iommu, debug_buf, DEBUG_BUFFER_SIZE);
+ if (ret < 0)
+ seq_puts(m, "Failed to get latency snapshot");
+ else
+ seq_puts(m, debug_buf);
+ seq_puts(m, "\n");
+}
+
+static int latency_show(struct seq_file *m, void *v)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd)
+ latency_show_one(m, iommu, drhd);
+ rcu_read_unlock();
+
+ return 0;
+}
+
+static int dmar_perf_latency_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, latency_show, NULL);
+}
+
+static ssize_t dmar_perf_latency_write(struct file *filp,
+ const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ int counting;
+ char buf[64];
+
+ if (cnt > 63)
+ cnt = 63;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ if (kstrtoint(buf, 0, &counting))
+ return -EINVAL;
+
+ switch (counting) {
+ case 0:
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ dmar_latency_disable(iommu, DMAR_LATENCY_INV_IOTLB);
+ dmar_latency_disable(iommu, DMAR_LATENCY_INV_DEVTLB);
+ dmar_latency_disable(iommu, DMAR_LATENCY_INV_IEC);
+ dmar_latency_disable(iommu, DMAR_LATENCY_PRQ);
+ }
+ rcu_read_unlock();
+ break;
+ case 1:
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd)
+ dmar_latency_enable(iommu, DMAR_LATENCY_INV_IOTLB);
+ rcu_read_unlock();
+ break;
+ case 2:
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd)
+ dmar_latency_enable(iommu, DMAR_LATENCY_INV_DEVTLB);
+ rcu_read_unlock();
+ break;
+ case 3:
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd)
+ dmar_latency_enable(iommu, DMAR_LATENCY_INV_IEC);
+ rcu_read_unlock();
+ break;
+ case 4:
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd)
+ dmar_latency_enable(iommu, DMAR_LATENCY_PRQ);
+ rcu_read_unlock();
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ *ppos += cnt;
+ return cnt;
+}
+
+static const struct file_operations dmar_perf_latency_fops = {
+ .open = dmar_perf_latency_open,
+ .write = dmar_perf_latency_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
void __init intel_iommu_debugfs_init(void)
{
struct dentry *intel_iommu_debug = debugfs_create_dir("intel",
@@ -556,4 +665,6 @@ void __init intel_iommu_debugfs_init(void)
debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug,
NULL, &ir_translation_struct_fops);
#endif
+ debugfs_create_file("dmar_perf_latency", 0644, intel_iommu_debug,
+ NULL, &dmar_perf_latency_fops);
}
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 84057cb9596c..d66f79acd14d 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -34,6 +34,7 @@
#include <trace/events/intel_iommu.h>
#include "../irq_remapping.h"
+#include "perf.h"
typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
struct dmar_res_callback {
@@ -1342,15 +1343,33 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
unsigned int count, unsigned long options)
{
struct q_inval *qi = iommu->qi;
+ s64 devtlb_start_ktime = 0;
+ s64 iotlb_start_ktime = 0;
+ s64 iec_start_ktime = 0;
struct qi_desc wait_desc;
int wait_index, index;
unsigned long flags;
int offset, shift;
int rc, i;
+ u64 type;
if (!qi)
return 0;
+ type = desc->qw0 & GENMASK_ULL(3, 0);
+
+ if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) &&
+ dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB))
+ iotlb_start_ktime = ktime_to_ns(ktime_get());
+
+ if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) &&
+ dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB))
+ devtlb_start_ktime = ktime_to_ns(ktime_get());
+
+ if (type == QI_IEC_TYPE &&
+ dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC))
+ iec_start_ktime = ktime_to_ns(ktime_get());
+
restart:
rc = 0;
@@ -1425,6 +1444,18 @@ restart:
if (rc == -EAGAIN)
goto restart;
+ if (iotlb_start_ktime)
+ dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB,
+ ktime_to_ns(ktime_get()) - iotlb_start_ktime);
+
+ if (devtlb_start_ktime)
+ dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB,
+ ktime_to_ns(ktime_get()) - devtlb_start_ktime);
+
+ if (iec_start_ktime)
+ dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC,
+ ktime_to_ns(ktime_get()) - iec_start_ktime);
+
return rc;
}
@@ -1913,16 +1944,23 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
reason = dmar_get_fault_reason(fault_reason, &fault_type);
if (fault_type == INTR_REMAP)
- pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
- source_id >> 8, PCI_SLOT(source_id & 0xFF),
- PCI_FUNC(source_id & 0xFF), addr >> 48,
- fault_reason, reason);
- else
- pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
+ pr_err("[INTR-REMAP] Request device [0x%02x:0x%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
+ source_id >> 8, PCI_SLOT(source_id & 0xFF),
+ PCI_FUNC(source_id & 0xFF), addr >> 48,
+ fault_reason, reason);
+ else if (pasid == INVALID_IOASID)
+ pr_err("[%s NO_PASID] Request device [0x%02x:0x%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
type ? "DMA Read" : "DMA Write",
source_id >> 8, PCI_SLOT(source_id & 0xFF),
- PCI_FUNC(source_id & 0xFF), pasid, addr,
+ PCI_FUNC(source_id & 0xFF), addr,
fault_reason, reason);
+ else
+ pr_err("[%s PASID 0x%x] Request device [0x%02x:0x%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
+ type ? "DMA Read" : "DMA Write", pasid,
+ source_id >> 8, PCI_SLOT(source_id & 0xFF),
+ PCI_FUNC(source_id & 0xFF), addr,
+ fault_reason, reason);
+
return 0;
}
@@ -1989,7 +2027,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
if (!ratelimited)
/* Using pasid -1 if pasid is not present */
dmar_fault_do_one(iommu, type, fault_reason,
- pasid_present ? pasid : -1,
+ pasid_present ? pasid : INVALID_IOASID,
source_id, guest_addr);
fault_index++;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index be35284a2016..a6a07d985709 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -46,6 +46,7 @@
#include <asm/iommu.h>
#include "../irq_remapping.h"
+#include "../iommu-sva-lib.h"
#include "pasid.h"
#include "cap_audit.h"
@@ -564,7 +565,7 @@ static inline int domain_pfn_supported(struct dmar_domain *domain,
static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
{
unsigned long sagaw;
- int agaw = -1;
+ int agaw;
sagaw = cap_sagaw(iommu->cap);
for (agaw = width_to_agaw(max_gaw);
@@ -625,12 +626,12 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
bool found = false;
int i;
- domain->iommu_coherency = 1;
+ domain->iommu_coherency = true;
for_each_domain_iommu(i, domain) {
found = true;
if (!iommu_paging_structure_coherency(g_iommus[i])) {
- domain->iommu_coherency = 0;
+ domain->iommu_coherency = false;
break;
}
}
@@ -641,18 +642,18 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
if (!iommu_paging_structure_coherency(iommu)) {
- domain->iommu_coherency = 0;
+ domain->iommu_coherency = false;
break;
}
}
rcu_read_unlock();
}
-static int domain_update_iommu_snooping(struct intel_iommu *skip)
+static bool domain_update_iommu_snooping(struct intel_iommu *skip)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
- int ret = 1;
+ bool ret = true;
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
@@ -665,7 +666,7 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip)
*/
if (!sm_supported(iommu) &&
!ecap_sc_support(iommu->ecap)) {
- ret = 0;
+ ret = false;
break;
}
}
@@ -682,9 +683,8 @@ static int domain_update_iommu_superpage(struct dmar_domain *domain,
struct intel_iommu *iommu;
int mask = 0x3;
- if (!intel_iommu_superpage) {
+ if (!intel_iommu_superpage)
return 0;
- }
/* set iommu_superpage to the smallest common denominator */
rcu_read_lock();
@@ -1919,7 +1919,6 @@ static int domain_attach_iommu(struct dmar_domain *domain,
assert_spin_locked(&iommu->lock);
domain->iommu_refcnt[iommu->seq_id] += 1;
- domain->iommu_count += 1;
if (domain->iommu_refcnt[iommu->seq_id] == 1) {
ndomains = cap_ndoms(iommu->cap);
num = find_first_zero_bit(iommu->domain_ids, ndomains);
@@ -1927,7 +1926,6 @@ static int domain_attach_iommu(struct dmar_domain *domain,
if (num >= ndomains) {
pr_err("%s: No free domain ids\n", iommu->name);
domain->iommu_refcnt[iommu->seq_id] -= 1;
- domain->iommu_count -= 1;
return -ENOSPC;
}
@@ -1943,16 +1941,15 @@ static int domain_attach_iommu(struct dmar_domain *domain,
return 0;
}
-static int domain_detach_iommu(struct dmar_domain *domain,
- struct intel_iommu *iommu)
+static void domain_detach_iommu(struct dmar_domain *domain,
+ struct intel_iommu *iommu)
{
- int num, count;
+ int num;
assert_spin_locked(&device_domain_lock);
assert_spin_locked(&iommu->lock);
domain->iommu_refcnt[iommu->seq_id] -= 1;
- count = --domain->iommu_count;
if (domain->iommu_refcnt[iommu->seq_id] == 0) {
num = domain->iommu_did[iommu->seq_id];
clear_bit(num, iommu->domain_ids);
@@ -1961,8 +1958,6 @@ static int domain_detach_iommu(struct dmar_domain *domain,
domain_update_iommu_cap(domain);
domain->iommu_did[iommu->seq_id] = 0;
}
-
- return count;
}
static inline int guestwidth_to_adjustwidth(int gaw)
@@ -4138,62 +4133,56 @@ static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
return container_of(iommu_dev, struct intel_iommu, iommu);
}
-static ssize_t intel_iommu_show_version(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
u32 ver = readl(iommu->reg + DMAR_VER_REG);
return sprintf(buf, "%d:%d\n",
DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
}
-static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
+static DEVICE_ATTR_RO(version);
-static ssize_t intel_iommu_show_address(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t address_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
return sprintf(buf, "%llx\n", iommu->reg_phys);
}
-static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
+static DEVICE_ATTR_RO(address);
-static ssize_t intel_iommu_show_cap(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t cap_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
return sprintf(buf, "%llx\n", iommu->cap);
}
-static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
+static DEVICE_ATTR_RO(cap);
-static ssize_t intel_iommu_show_ecap(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t ecap_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
return sprintf(buf, "%llx\n", iommu->ecap);
}
-static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
+static DEVICE_ATTR_RO(ecap);
-static ssize_t intel_iommu_show_ndoms(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t domains_supported_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
}
-static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
+static DEVICE_ATTR_RO(domains_supported);
-static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t domains_used_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
cap_ndoms(iommu->cap)));
}
-static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
+static DEVICE_ATTR_RO(domains_used);
static struct attribute *intel_iommu_attrs[] = {
&dev_attr_version.attr,
@@ -4511,13 +4500,13 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
adjust_width = guestwidth_to_adjustwidth(guest_width);
domain->agaw = width_to_agaw(adjust_width);
- domain->iommu_coherency = 0;
- domain->iommu_snooping = 0;
+ domain->iommu_coherency = false;
+ domain->iommu_snooping = false;
domain->iommu_superpage = 0;
domain->max_addr = 0;
/* always allocate the top pgd */
- domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
+ domain->pgd = alloc_pgtable_page(domain->nid);
if (!domain->pgd)
return -ENOMEM;
domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
@@ -4757,6 +4746,13 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
if (!iommu)
return -ENODEV;
+ if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) &&
+ !ecap_nest(iommu->ecap)) {
+ dev_err(dev, "%s: iommu not support nested translation\n",
+ iommu->name);
+ return -EINVAL;
+ }
+
/* check if this iommu agaw is sufficient for max mapped address */
addr_width = agaw_to_width(iommu->agaw);
if (addr_width > cap_mgaw(iommu->cap))
@@ -4778,8 +4774,7 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
pte = dmar_domain->pgd;
if (dma_pte_present(pte)) {
- dmar_domain->pgd = (struct dma_pte *)
- phys_to_virt(dma_pte_addr(pte));
+ dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
free_pgtable_page(pte);
}
dmar_domain->agaw--;
@@ -5129,7 +5124,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
static bool intel_iommu_capable(enum iommu_cap cap)
{
if (cap == IOMMU_CAP_CACHE_COHERENCY)
- return domain_update_iommu_snooping(NULL) == 1;
+ return domain_update_iommu_snooping(NULL);
if (cap == IOMMU_CAP_INTR_REMAP)
return irq_remapping_enabled == 1;
@@ -5165,13 +5160,10 @@ static void intel_iommu_release_device(struct device *dev)
static void intel_iommu_probe_finalize(struct device *dev)
{
- dma_addr_t base = IOVA_START_PFN << VTD_PAGE_SHIFT;
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct dmar_domain *dmar_domain = to_dmar_domain(domain);
if (domain && domain->type == IOMMU_DOMAIN_DMA)
- iommu_setup_dma_ops(dev, base,
- __DOMAIN_MAX_ADDR(dmar_domain->gaw) - base);
+ iommu_setup_dma_ops(dev, 0, U64_MAX);
else
set_dma_ops(dev, NULL);
}
@@ -5331,6 +5323,48 @@ static int intel_iommu_disable_auxd(struct device *dev)
return 0;
}
+static int intel_iommu_enable_sva(struct device *dev)
+{
+ struct device_domain_info *info = get_domain_info(dev);
+ struct intel_iommu *iommu;
+ int ret;
+
+ if (!info || dmar_disabled)
+ return -EINVAL;
+
+ iommu = info->iommu;
+ if (!iommu)
+ return -EINVAL;
+
+ if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
+ return -ENODEV;
+
+ if (intel_iommu_enable_pasid(iommu, dev))
+ return -ENODEV;
+
+ if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
+ return -EINVAL;
+
+ ret = iopf_queue_add_device(iommu->iopf_queue, dev);
+ if (!ret)
+ ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
+
+ return ret;
+}
+
+static int intel_iommu_disable_sva(struct device *dev)
+{
+ struct device_domain_info *info = get_domain_info(dev);
+ struct intel_iommu *iommu = info->iommu;
+ int ret;
+
+ ret = iommu_unregister_device_fault_handler(dev);
+ if (!ret)
+ ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
+
+ return ret;
+}
+
/*
* A PCI express designated vendor specific extended capability is defined
* in the section 3.7 of Intel scalable I/O virtualization technical spec
@@ -5392,35 +5426,37 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
static int
intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
{
- if (feat == IOMMU_DEV_FEAT_AUX)
+ switch (feat) {
+ case IOMMU_DEV_FEAT_AUX:
return intel_iommu_enable_auxd(dev);
- if (feat == IOMMU_DEV_FEAT_IOPF)
+ case IOMMU_DEV_FEAT_IOPF:
return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV;
- if (feat == IOMMU_DEV_FEAT_SVA) {
- struct device_domain_info *info = get_domain_info(dev);
-
- if (!info)
- return -EINVAL;
+ case IOMMU_DEV_FEAT_SVA:
+ return intel_iommu_enable_sva(dev);
- if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
- return -EINVAL;
-
- if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE)
- return 0;
+ default:
+ return -ENODEV;
}
-
- return -ENODEV;
}
static int
intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
{
- if (feat == IOMMU_DEV_FEAT_AUX)
+ switch (feat) {
+ case IOMMU_DEV_FEAT_AUX:
return intel_iommu_disable_auxd(dev);
- return -ENODEV;
+ case IOMMU_DEV_FEAT_IOPF:
+ return 0;
+
+ case IOMMU_DEV_FEAT_SVA:
+ return intel_iommu_disable_sva(dev);
+
+ default:
+ return -ENODEV;
+ }
}
static bool
@@ -5457,7 +5493,7 @@ intel_iommu_enable_nesting(struct iommu_domain *domain)
int ret = -ENODEV;
spin_lock_irqsave(&device_domain_lock, flags);
- if (nested_mode_support() && list_empty(&dmar_domain->devices)) {
+ if (list_empty(&dmar_domain->devices)) {
dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
ret = 0;
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 72dc84821dad..c6cf44a6c923 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/**
+/*
* intel-pasid.c - PASID idr, table and entry manipulation
*
* Copyright (C) 2018 Intel Corporation
diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c
new file mode 100644
index 000000000000..73b7ec705552
--- /dev/null
+++ b/drivers/iommu/intel/perf.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * perf.c - performance monitor
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ * Fenghua Yu <fenghua.yu@intel.com>
+ */
+
+#include <linux/spinlock.h>
+#include <linux/intel-iommu.h>
+
+#include "perf.h"
+
+static DEFINE_SPINLOCK(latency_lock);
+
+bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type)
+{
+ struct latency_statistic *lstat = iommu->perf_statistic;
+
+ return lstat && lstat[type].enabled;
+}
+
+int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type)
+{
+ struct latency_statistic *lstat;
+ unsigned long flags;
+ int ret = -EBUSY;
+
+ if (dmar_latency_enabled(iommu, type))
+ return 0;
+
+ spin_lock_irqsave(&latency_lock, flags);
+ if (!iommu->perf_statistic) {
+ iommu->perf_statistic = kzalloc(sizeof(*lstat) * DMAR_LATENCY_NUM,
+ GFP_ATOMIC);
+ if (!iommu->perf_statistic) {
+ ret = -ENOMEM;
+ goto unlock_out;
+ }
+ }
+
+ lstat = iommu->perf_statistic;
+
+ if (!lstat[type].enabled) {
+ lstat[type].enabled = true;
+ lstat[type].counter[COUNTS_MIN] = UINT_MAX;
+ ret = 0;
+ }
+unlock_out:
+ spin_unlock_irqrestore(&latency_lock, flags);
+
+ return ret;
+}
+
+void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type)
+{
+ struct latency_statistic *lstat = iommu->perf_statistic;
+ unsigned long flags;
+
+ if (!dmar_latency_enabled(iommu, type))
+ return;
+
+ spin_lock_irqsave(&latency_lock, flags);
+ memset(&lstat[type], 0, sizeof(*lstat) * DMAR_LATENCY_NUM);
+ spin_unlock_irqrestore(&latency_lock, flags);
+}
+
+void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency)
+{
+ struct latency_statistic *lstat = iommu->perf_statistic;
+ unsigned long flags;
+ u64 min, max;
+
+ if (!dmar_latency_enabled(iommu, type))
+ return;
+
+ spin_lock_irqsave(&latency_lock, flags);
+ if (latency < 100)
+ lstat[type].counter[COUNTS_10e2]++;
+ else if (latency < 1000)
+ lstat[type].counter[COUNTS_10e3]++;
+ else if (latency < 10000)
+ lstat[type].counter[COUNTS_10e4]++;
+ else if (latency < 100000)
+ lstat[type].counter[COUNTS_10e5]++;
+ else if (latency < 1000000)
+ lstat[type].counter[COUNTS_10e6]++;
+ else if (latency < 10000000)
+ lstat[type].counter[COUNTS_10e7]++;
+ else
+ lstat[type].counter[COUNTS_10e8_plus]++;
+
+ min = lstat[type].counter[COUNTS_MIN];
+ max = lstat[type].counter[COUNTS_MAX];
+ lstat[type].counter[COUNTS_MIN] = min_t(u64, min, latency);
+ lstat[type].counter[COUNTS_MAX] = max_t(u64, max, latency);
+ lstat[type].counter[COUNTS_SUM] += latency;
+ lstat[type].samples++;
+ spin_unlock_irqrestore(&latency_lock, flags);
+}
+
+static char *latency_counter_names[] = {
+ " <0.1us",
+ " 0.1us-1us", " 1us-10us", " 10us-100us",
+ " 100us-1ms", " 1ms-10ms", " >=10ms",
+ " min(us)", " max(us)", " average(us)"
+};
+
+static char *latency_type_names[] = {
+ " inv_iotlb", " inv_devtlb", " inv_iec",
+ " svm_prq"
+};
+
+int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
+{
+ struct latency_statistic *lstat = iommu->perf_statistic;
+ unsigned long flags;
+ int bytes = 0, i, j;
+
+ memset(str, 0, size);
+
+ for (i = 0; i < COUNTS_NUM; i++)
+ bytes += snprintf(str + bytes, size - bytes,
+ "%s", latency_counter_names[i]);
+
+ spin_lock_irqsave(&latency_lock, flags);
+ for (i = 0; i < DMAR_LATENCY_NUM; i++) {
+ if (!dmar_latency_enabled(iommu, i))
+ continue;
+
+ bytes += snprintf(str + bytes, size - bytes,
+ "\n%s", latency_type_names[i]);
+
+ for (j = 0; j < COUNTS_NUM; j++) {
+ u64 val = lstat[i].counter[j];
+
+ switch (j) {
+ case COUNTS_MIN:
+ if (val == UINT_MAX)
+ val = 0;
+ else
+ val = div_u64(val, 1000);
+ break;
+ case COUNTS_MAX:
+ val = div_u64(val, 1000);
+ break;
+ case COUNTS_SUM:
+ if (lstat[i].samples)
+ val = div_u64(val, (lstat[i].samples * 1000));
+ else
+ val = 0;
+ break;
+ default:
+ break;
+ }
+
+ bytes += snprintf(str + bytes, size - bytes,
+ "%12lld", val);
+ }
+ }
+ spin_unlock_irqrestore(&latency_lock, flags);
+
+ return bytes;
+}
diff --git a/drivers/iommu/intel/perf.h b/drivers/iommu/intel/perf.h
new file mode 100644
index 000000000000..fd6db8049d1a
--- /dev/null
+++ b/drivers/iommu/intel/perf.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * perf.h - performance monitor header
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ */
+
+enum latency_type {
+ DMAR_LATENCY_INV_IOTLB = 0,
+ DMAR_LATENCY_INV_DEVTLB,
+ DMAR_LATENCY_INV_IEC,
+ DMAR_LATENCY_PRQ,
+ DMAR_LATENCY_NUM
+};
+
+enum latency_count {
+ COUNTS_10e2 = 0, /* < 0.1us */
+ COUNTS_10e3, /* 0.1us ~ 1us */
+ COUNTS_10e4, /* 1us ~ 10us */
+ COUNTS_10e5, /* 10us ~ 100us */
+ COUNTS_10e6, /* 100us ~ 1ms */
+ COUNTS_10e7, /* 1ms ~ 10ms */
+ COUNTS_10e8_plus, /* 10ms and plus*/
+ COUNTS_MIN,
+ COUNTS_MAX,
+ COUNTS_SUM,
+ COUNTS_NUM
+};
+
+struct latency_statistic {
+ bool enabled;
+ u64 counter[COUNTS_NUM];
+ u64 samples;
+};
+
+#ifdef CONFIG_DMAR_PERF
+int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type);
+void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type);
+bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type);
+void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type,
+ u64 latency);
+int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size);
+#else
+static inline int
+dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type)
+{
+ return -EINVAL;
+}
+
+static inline void
+dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type)
+{
+}
+
+static inline bool
+dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type)
+{
+ return false;
+}
+
+static inline void
+dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency)
+{
+}
+
+static inline int
+dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
+{
+ return 0;
+}
+#endif /* CONFIG_DMAR_PERF */
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 5165cea90421..9b0f22bc0514 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -17,19 +17,76 @@
#include <linux/dmar.h>
#include <linux/interrupt.h>
#include <linux/mm_types.h>
+#include <linux/xarray.h>
#include <linux/ioasid.h>
#include <asm/page.h>
#include <asm/fpu/api.h>
+#include <trace/events/intel_iommu.h>
#include "pasid.h"
+#include "perf.h"
+#include "../iommu-sva-lib.h"
static irqreturn_t prq_event_thread(int irq, void *d);
static void intel_svm_drain_prq(struct device *dev, u32 pasid);
+#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
#define PRQ_ORDER 0
+static DEFINE_XARRAY_ALLOC(pasid_private_array);
+static int pasid_private_add(ioasid_t pasid, void *priv)
+{
+ return xa_alloc(&pasid_private_array, &pasid, priv,
+ XA_LIMIT(pasid, pasid), GFP_ATOMIC);
+}
+
+static void pasid_private_remove(ioasid_t pasid)
+{
+ xa_erase(&pasid_private_array, pasid);
+}
+
+static void *pasid_private_find(ioasid_t pasid)
+{
+ return xa_load(&pasid_private_array, pasid);
+}
+
+static struct intel_svm_dev *
+svm_lookup_device_by_sid(struct intel_svm *svm, u16 sid)
+{
+ struct intel_svm_dev *sdev = NULL, *t;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(t, &svm->devs, list) {
+ if (t->sid == sid) {
+ sdev = t;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return sdev;
+}
+
+static struct intel_svm_dev *
+svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
+{
+ struct intel_svm_dev *sdev = NULL, *t;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(t, &svm->devs, list) {
+ if (t->dev == dev) {
+ sdev = t;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return sdev;
+}
+
int intel_svm_enable_prq(struct intel_iommu *iommu)
{
+ struct iopf_queue *iopfq;
struct page *pages;
int irq, ret;
@@ -46,13 +103,20 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
iommu->name);
ret = -EINVAL;
- err:
- free_pages((unsigned long)iommu->prq, PRQ_ORDER);
- iommu->prq = NULL;
- return ret;
+ goto free_prq;
}
iommu->pr_irq = irq;
+ snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
+ "dmar%d-iopfq", iommu->seq_id);
+ iopfq = iopf_queue_alloc(iommu->iopfq_name);
+ if (!iopfq) {
+ pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
+ ret = -ENOMEM;
+ goto free_hwirq;
+ }
+ iommu->iopf_queue = iopfq;
+
snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
@@ -60,9 +124,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
if (ret) {
pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
iommu->name);
- dmar_free_hwirq(irq);
- iommu->pr_irq = 0;
- goto err;
+ goto free_iopfq;
}
dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
@@ -71,6 +133,18 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
init_completion(&iommu->prq_complete);
return 0;
+
+free_iopfq:
+ iopf_queue_free(iommu->iopf_queue);
+ iommu->iopf_queue = NULL;
+free_hwirq:
+ dmar_free_hwirq(irq);
+ iommu->pr_irq = 0;
+free_prq:
+ free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+ iommu->prq = NULL;
+
+ return ret;
}
int intel_svm_finish_prq(struct intel_iommu *iommu)
@@ -85,6 +159,11 @@ int intel_svm_finish_prq(struct intel_iommu *iommu)
iommu->pr_irq = 0;
}
+ if (iommu->iopf_queue) {
+ iopf_queue_free(iommu->iopf_queue);
+ iommu->iopf_queue = NULL;
+ }
+
free_pages((unsigned long)iommu->prq, PRQ_ORDER);
iommu->prq = NULL;
@@ -204,17 +283,12 @@ static const struct mmu_notifier_ops intel_mmuops = {
};
static DEFINE_MUTEX(pasid_mutex);
-static LIST_HEAD(global_svm_list);
-
-#define for_each_svm_dev(sdev, svm, d) \
- list_for_each_entry((sdev), &(svm)->devs, list) \
- if ((d) != (sdev)->dev) {} else
static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
struct intel_svm **rsvm,
struct intel_svm_dev **rsdev)
{
- struct intel_svm_dev *d, *sdev = NULL;
+ struct intel_svm_dev *sdev = NULL;
struct intel_svm *svm;
/* The caller should hold the pasid_mutex lock */
@@ -224,7 +298,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
return -EINVAL;
- svm = ioasid_find(NULL, pasid, NULL);
+ svm = pasid_private_find(pasid);
if (IS_ERR(svm))
return PTR_ERR(svm);
@@ -237,15 +311,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
*/
if (WARN_ON(list_empty(&svm->devs)))
return -EINVAL;
-
- rcu_read_lock();
- list_for_each_entry_rcu(d, &svm->devs, list) {
- if (d->dev == dev) {
- sdev = d;
- break;
- }
- }
- rcu_read_unlock();
+ sdev = svm_lookup_device_by_dev(svm, dev);
out:
*rsvm = svm;
@@ -334,7 +400,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
svm->gpasid = data->gpasid;
svm->flags |= SVM_FLAG_GUEST_PASID;
}
- ioasid_set_data(data->hpasid, svm);
+ pasid_private_add(data->hpasid, svm);
INIT_LIST_HEAD_RCU(&svm->devs);
mmput(svm->mm);
}
@@ -388,7 +454,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
list_add_rcu(&sdev->list, &svm->devs);
out:
if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) {
- ioasid_set_data(data->hpasid, NULL);
+ pasid_private_remove(data->hpasid);
kfree(svm);
}
@@ -431,7 +497,7 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid)
* the unbind, IOMMU driver will get notified
* and perform cleanup.
*/
- ioasid_set_data(pasid, NULL);
+ pasid_private_remove(pasid);
kfree(svm);
}
}
@@ -459,79 +525,81 @@ static void load_pasid(struct mm_struct *mm, u32 pasid)
mutex_unlock(&mm->context.lock);
}
-/* Caller must hold pasid_mutex, mm reference */
-static int
-intel_svm_bind_mm(struct device *dev, unsigned int flags,
- struct mm_struct *mm, struct intel_svm_dev **sd)
+static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
+ unsigned int flags)
{
- struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
- struct intel_svm *svm = NULL, *t;
- struct device_domain_info *info;
- struct intel_svm_dev *sdev;
- unsigned long iflags;
- int pasid_max;
- int ret;
+ ioasid_t max_pasid = dev_is_pci(dev) ?
+ pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id;
- if (!iommu || dmar_disabled)
- return -EINVAL;
+ return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1);
+}
- if (!intel_svm_capable(iommu))
- return -ENOTSUPP;
+static void intel_svm_free_pasid(struct mm_struct *mm)
+{
+ iommu_sva_free_pasid(mm);
+}
- if (dev_is_pci(dev)) {
- pasid_max = pci_max_pasids(to_pci_dev(dev));
- if (pasid_max < 0)
- return -EINVAL;
- } else
- pasid_max = 1 << 20;
+static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
+ struct device *dev,
+ struct mm_struct *mm,
+ unsigned int flags)
+{
+ struct device_domain_info *info = get_domain_info(dev);
+ unsigned long iflags, sflags;
+ struct intel_svm_dev *sdev;
+ struct intel_svm *svm;
+ int ret = 0;
- /* Bind supervisor PASID shuld have mm = NULL */
- if (flags & SVM_FLAG_SUPERVISOR_MODE) {
- if (!ecap_srs(iommu->ecap) || mm) {
- pr_err("Supervisor PASID with user provided mm.\n");
- return -EINVAL;
- }
- }
+ svm = pasid_private_find(mm->pasid);
+ if (!svm) {
+ svm = kzalloc(sizeof(*svm), GFP_KERNEL);
+ if (!svm)
+ return ERR_PTR(-ENOMEM);
- list_for_each_entry(t, &global_svm_list, list) {
- if (t->mm != mm)
- continue;
+ svm->pasid = mm->pasid;
+ svm->mm = mm;
+ svm->flags = flags;
+ INIT_LIST_HEAD_RCU(&svm->devs);
- svm = t;
- if (svm->pasid >= pasid_max) {
- dev_warn(dev,
- "Limited PASID width. Cannot use existing PASID %d\n",
- svm->pasid);
- ret = -ENOSPC;
- goto out;
+ if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) {
+ svm->notifier.ops = &intel_mmuops;
+ ret = mmu_notifier_register(&svm->notifier, mm);
+ if (ret) {
+ kfree(svm);
+ return ERR_PTR(ret);
+ }
}
- /* Find the matching device in svm list */
- for_each_svm_dev(sdev, svm, dev) {
- sdev->users++;
- goto success;
+ ret = pasid_private_add(svm->pasid, svm);
+ if (ret) {
+ if (svm->notifier.ops)
+ mmu_notifier_unregister(&svm->notifier, mm);
+ kfree(svm);
+ return ERR_PTR(ret);
}
+ }
- break;
+ /* Find the matching device in svm list */
+ sdev = svm_lookup_device_by_dev(svm, dev);
+ if (sdev) {
+ sdev->users++;
+ goto success;
}
sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
if (!sdev) {
ret = -ENOMEM;
- goto out;
+ goto free_svm;
}
+
sdev->dev = dev;
sdev->iommu = iommu;
-
- ret = intel_iommu_enable_pasid(iommu, dev);
- if (ret) {
- kfree(sdev);
- goto out;
- }
-
- info = get_domain_info(dev);
sdev->did = FLPT_DEFAULT_DID;
sdev->sid = PCI_DEVID(info->bus, info->devfn);
+ sdev->users = 1;
+ sdev->pasid = svm->pasid;
+ sdev->sva.dev = dev;
+ init_rcu_head(&sdev->rcu);
if (info->ats_enabled) {
sdev->dev_iotlb = 1;
sdev->qdep = info->ats_qdep;
@@ -539,95 +607,37 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
sdev->qdep = 0;
}
- /* Finish the setup now we know we're keeping it */
- sdev->users = 1;
- init_rcu_head(&sdev->rcu);
-
- if (!svm) {
- svm = kzalloc(sizeof(*svm), GFP_KERNEL);
- if (!svm) {
- ret = -ENOMEM;
- kfree(sdev);
- goto out;
- }
-
- if (pasid_max > intel_pasid_max_id)
- pasid_max = intel_pasid_max_id;
+ /* Setup the pasid table: */
+ sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ?
+ PASID_FLAG_SUPERVISOR_MODE : 0;
+ sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
+ spin_lock_irqsave(&iommu->lock, iflags);
+ ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
+ FLPT_DEFAULT_DID, sflags);
+ spin_unlock_irqrestore(&iommu->lock, iflags);
- /* Do not use PASID 0, reserved for RID to PASID */
- svm->pasid = ioasid_alloc(NULL, PASID_MIN,
- pasid_max - 1, svm);
- if (svm->pasid == INVALID_IOASID) {
- kfree(svm);
- kfree(sdev);
- ret = -ENOSPC;
- goto out;
- }
- svm->notifier.ops = &intel_mmuops;
- svm->mm = mm;
- svm->flags = flags;
- INIT_LIST_HEAD_RCU(&svm->devs);
- INIT_LIST_HEAD(&svm->list);
- ret = -ENOMEM;
- if (mm) {
- ret = mmu_notifier_register(&svm->notifier, mm);
- if (ret) {
- ioasid_put(svm->pasid);
- kfree(svm);
- kfree(sdev);
- goto out;
- }
- }
+ if (ret)
+ goto free_sdev;
- spin_lock_irqsave(&iommu->lock, iflags);
- ret = intel_pasid_setup_first_level(iommu, dev,
- mm ? mm->pgd : init_mm.pgd,
- svm->pasid, FLPT_DEFAULT_DID,
- (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
- (cpu_feature_enabled(X86_FEATURE_LA57) ?
- PASID_FLAG_FL5LP : 0));
- spin_unlock_irqrestore(&iommu->lock, iflags);
- if (ret) {
- if (mm)
- mmu_notifier_unregister(&svm->notifier, mm);
- ioasid_put(svm->pasid);
- kfree(svm);
- kfree(sdev);
- goto out;
- }
+ /* The newly allocated pasid is loaded to the mm. */
+ if (!(flags & SVM_FLAG_SUPERVISOR_MODE) && list_empty(&svm->devs))
+ load_pasid(mm, svm->pasid);
- list_add_tail(&svm->list, &global_svm_list);
- if (mm) {
- /* The newly allocated pasid is loaded to the mm. */
- load_pasid(mm, svm->pasid);
- }
- } else {
- /*
- * Binding a new device with existing PASID, need to setup
- * the PASID entry.
- */
- spin_lock_irqsave(&iommu->lock, iflags);
- ret = intel_pasid_setup_first_level(iommu, dev,
- mm ? mm->pgd : init_mm.pgd,
- svm->pasid, FLPT_DEFAULT_DID,
- (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
- (cpu_feature_enabled(X86_FEATURE_LA57) ?
- PASID_FLAG_FL5LP : 0));
- spin_unlock_irqrestore(&iommu->lock, iflags);
- if (ret) {
- kfree(sdev);
- goto out;
- }
- }
list_add_rcu(&sdev->list, &svm->devs);
success:
- sdev->pasid = svm->pasid;
- sdev->sva.dev = dev;
- if (sd)
- *sd = sdev;
- ret = 0;
-out:
- return ret;
+ return &sdev->sva;
+
+free_sdev:
+ kfree(sdev);
+free_svm:
+ if (list_empty(&svm->devs)) {
+ if (svm->notifier.ops)
+ mmu_notifier_unregister(&svm->notifier, mm);
+ pasid_private_remove(mm->pasid);
+ kfree(svm);
+ }
+
+ return ERR_PTR(ret);
}
/* Caller must hold pasid_mutex */
@@ -636,6 +646,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
struct intel_svm_dev *sdev;
struct intel_iommu *iommu;
struct intel_svm *svm;
+ struct mm_struct *mm;
int ret = -EINVAL;
iommu = device_to_iommu(dev, NULL, NULL);
@@ -645,6 +656,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
if (ret)
goto out;
+ mm = svm->mm;
if (sdev) {
sdev->users--;
@@ -663,13 +675,13 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
kfree_rcu(sdev, rcu);
if (list_empty(&svm->devs)) {
- ioasid_put(svm->pasid);
- if (svm->mm) {
- mmu_notifier_unregister(&svm->notifier, svm->mm);
+ intel_svm_free_pasid(mm);
+ if (svm->notifier.ops) {
+ mmu_notifier_unregister(&svm->notifier, mm);
/* Clear mm's pasid. */
- load_pasid(svm->mm, PASID_DISABLED);
+ load_pasid(mm, PASID_DISABLED);
}
- list_del(&svm->list);
+ pasid_private_remove(svm->pasid);
/* We mandate that no page faults may be outstanding
* for the PASID when intel_svm_unbind_mm() is called.
* If that is not obeyed, subtle errors will happen.
@@ -714,22 +726,6 @@ struct page_req_dsc {
#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20)
-static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req)
-{
- unsigned long requested = 0;
-
- if (req->exe_req)
- requested |= VM_EXEC;
-
- if (req->rd_req)
- requested |= VM_READ;
-
- if (req->wr_req)
- requested |= VM_WRITE;
-
- return (requested & ~vma->vm_flags) != 0;
-}
-
static bool is_canonical_address(u64 addr)
{
int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
@@ -799,6 +795,8 @@ prq_retry:
goto prq_retry;
}
+ iopf_queue_flush_dev(dev);
+
/*
* Perform steps described in VT-d spec CH7.10 to drain page
* requests and responses in hardware.
@@ -841,8 +839,8 @@ static int prq_to_iommu_prot(struct page_req_dsc *req)
return prot;
}
-static int
-intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
+static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
+ struct page_req_dsc *desc)
{
struct iommu_fault_event event;
@@ -872,159 +870,136 @@ intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
*/
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
- memcpy(event.fault.prm.private_data, desc->priv_data,
- sizeof(desc->priv_data));
+ event.fault.prm.private_data[0] = desc->priv_data[0];
+ event.fault.prm.private_data[1] = desc->priv_data[1];
+ } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) {
+ /*
+ * If the private data fields are not used by hardware, use it
+ * to monitor the prq handle latency.
+ */
+ event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
}
return iommu_report_device_fault(dev, &event);
}
+static void handle_bad_prq_event(struct intel_iommu *iommu,
+ struct page_req_dsc *req, int result)
+{
+ struct qi_desc desc;
+
+ pr_err("%s: Invalid page request: %08llx %08llx\n",
+ iommu->name, ((unsigned long long *)req)[0],
+ ((unsigned long long *)req)[1]);
+
+ /*
+ * Per VT-d spec. v3.0 ch7.7, system software must
+ * respond with page group response if private data
+ * is present (PDP) or last page in group (LPIG) bit
+ * is set. This is an additional VT-d feature beyond
+ * PCI ATS spec.
+ */
+ if (!req->lpig && !req->priv_data_present)
+ return;
+
+ desc.qw0 = QI_PGRP_PASID(req->pasid) |
+ QI_PGRP_DID(req->rid) |
+ QI_PGRP_PASID_P(req->pasid_present) |
+ QI_PGRP_PDP(req->priv_data_present) |
+ QI_PGRP_RESP_CODE(result) |
+ QI_PGRP_RESP_TYPE;
+ desc.qw1 = QI_PGRP_IDX(req->prg_index) |
+ QI_PGRP_LPIG(req->lpig);
+
+ if (req->priv_data_present) {
+ desc.qw2 = req->priv_data[0];
+ desc.qw3 = req->priv_data[1];
+ } else {
+ desc.qw2 = 0;
+ desc.qw3 = 0;
+ }
+
+ qi_submit_sync(iommu, &desc, 1, 0);
+}
+
static irqreturn_t prq_event_thread(int irq, void *d)
{
struct intel_svm_dev *sdev = NULL;
struct intel_iommu *iommu = d;
struct intel_svm *svm = NULL;
- int head, tail, handled = 0;
- unsigned int flags = 0;
+ struct page_req_dsc *req;
+ int head, tail, handled;
+ u64 address;
- /* Clear PPR bit before reading head/tail registers, to
- * ensure that we get a new interrupt if needed. */
+ /*
+ * Clear PPR bit before reading head/tail registers, to ensure that
+ * we get a new interrupt if needed.
+ */
writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
+ handled = (head != tail);
while (head != tail) {
- struct vm_area_struct *vma;
- struct page_req_dsc *req;
- struct qi_desc resp;
- int result;
- vm_fault_t ret;
- u64 address;
-
- handled = 1;
req = &iommu->prq[head / sizeof(*req)];
- result = QI_RESP_INVALID;
address = (u64)req->addr << VTD_PAGE_SHIFT;
- if (!req->pasid_present) {
- pr_err("%s: Page request without PASID: %08llx %08llx\n",
- iommu->name, ((unsigned long long *)req)[0],
- ((unsigned long long *)req)[1]);
- goto no_pasid;
+
+ if (unlikely(!req->pasid_present)) {
+ pr_err("IOMMU: %s: Page request without PASID\n",
+ iommu->name);
+bad_req:
+ svm = NULL;
+ sdev = NULL;
+ handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
+ goto prq_advance;
}
- /* We shall not receive page request for supervisor SVM */
- if (req->pm_req && (req->rd_req | req->wr_req)) {
- pr_err("Unexpected page request in Privilege Mode");
- /* No need to find the matching sdev as for bad_req */
- goto no_pasid;
+
+ if (unlikely(!is_canonical_address(address))) {
+ pr_err("IOMMU: %s: Address is not canonical\n",
+ iommu->name);
+ goto bad_req;
}
- /* DMA read with exec requeset is not supported. */
- if (req->exe_req && req->rd_req) {
- pr_err("Execution request not supported\n");
- goto no_pasid;
+
+ if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
+ pr_err("IOMMU: %s: Page request in Privilege Mode\n",
+ iommu->name);
+ goto bad_req;
}
+
+ if (unlikely(req->exe_req && req->rd_req)) {
+ pr_err("IOMMU: %s: Execution request not supported\n",
+ iommu->name);
+ goto bad_req;
+ }
+
if (!svm || svm->pasid != req->pasid) {
- rcu_read_lock();
- svm = ioasid_find(NULL, req->pasid, NULL);
- /* It *can't* go away, because the driver is not permitted
+ /*
+ * It can't go away, because the driver is not permitted
* to unbind the mm while any page faults are outstanding.
- * So we only need RCU to protect the internal idr code. */
- rcu_read_unlock();
- if (IS_ERR_OR_NULL(svm)) {
- pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
- iommu->name, req->pasid, ((unsigned long long *)req)[0],
- ((unsigned long long *)req)[1]);
- goto no_pasid;
- }
+ */
+ svm = pasid_private_find(req->pasid);
+ if (IS_ERR_OR_NULL(svm) || (svm->flags & SVM_FLAG_SUPERVISOR_MODE))
+ goto bad_req;
}
if (!sdev || sdev->sid != req->rid) {
- struct intel_svm_dev *t;
-
- sdev = NULL;
- rcu_read_lock();
- list_for_each_entry_rcu(t, &svm->devs, list) {
- if (t->sid == req->rid) {
- sdev = t;
- break;
- }
- }
- rcu_read_unlock();
+ sdev = svm_lookup_device_by_sid(svm, req->rid);
+ if (!sdev)
+ goto bad_req;
}
- /* Since we're using init_mm.pgd directly, we should never take
- * any faults on kernel addresses. */
- if (!svm->mm)
- goto bad_req;
-
- /* If address is not canonical, return invalid response */
- if (!is_canonical_address(address))
- goto bad_req;
+ sdev->prq_seq_number++;
/*
* If prq is to be handled outside iommu driver via receiver of
* the fault notifiers, we skip the page response here.
*/
- if (svm->flags & SVM_FLAG_GUEST_MODE) {
- if (sdev && !intel_svm_prq_report(sdev->dev, req))
- goto prq_advance;
- else
- goto bad_req;
- }
-
- /* If the mm is already defunct, don't handle faults. */
- if (!mmget_not_zero(svm->mm))
- goto bad_req;
-
- mmap_read_lock(svm->mm);
- vma = find_extend_vma(svm->mm, address);
- if (!vma || address < vma->vm_start)
- goto invalid;
-
- if (access_error(vma, req))
- goto invalid;
+ if (intel_svm_prq_report(iommu, sdev->dev, req))
+ handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
- flags = FAULT_FLAG_USER | FAULT_FLAG_REMOTE;
- if (req->wr_req)
- flags |= FAULT_FLAG_WRITE;
-
- ret = handle_mm_fault(vma, address, flags, NULL);
- if (ret & VM_FAULT_ERROR)
- goto invalid;
-
- result = QI_RESP_SUCCESS;
-invalid:
- mmap_read_unlock(svm->mm);
- mmput(svm->mm);
-bad_req:
- /* We get here in the error case where the PASID lookup failed,
- and these can be NULL. Do not use them below this point! */
- sdev = NULL;
- svm = NULL;
-no_pasid:
- if (req->lpig || req->priv_data_present) {
- /*
- * Per VT-d spec. v3.0 ch7.7, system software must
- * respond with page group response if private data
- * is present (PDP) or last page in group (LPIG) bit
- * is set. This is an additional VT-d feature beyond
- * PCI ATS spec.
- */
- resp.qw0 = QI_PGRP_PASID(req->pasid) |
- QI_PGRP_DID(req->rid) |
- QI_PGRP_PASID_P(req->pasid_present) |
- QI_PGRP_PDP(req->priv_data_present) |
- QI_PGRP_RESP_CODE(result) |
- QI_PGRP_RESP_TYPE;
- resp.qw1 = QI_PGRP_IDX(req->prg_index) |
- QI_PGRP_LPIG(req->lpig);
- resp.qw2 = 0;
- resp.qw3 = 0;
-
- if (req->priv_data_present)
- memcpy(&resp.qw2, req->priv_data,
- sizeof(req->priv_data));
- qi_submit_sync(iommu, &resp, 1, 0);
- }
+ trace_prq_report(iommu, sdev->dev, req->qw_0, req->qw_1,
+ req->priv_data[0], req->priv_data[1],
+ sdev->prq_seq_number);
prq_advance:
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
@@ -1041,6 +1016,7 @@ prq_advance:
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
if (head == tail) {
+ iopf_queue_discard_partial(iommu->iopf_queue);
writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
iommu->name);
@@ -1053,31 +1029,42 @@ prq_advance:
return IRQ_RETVAL(handled);
}
-#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
-struct iommu_sva *
-intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
{
- struct iommu_sva *sva = ERR_PTR(-EINVAL);
- struct intel_svm_dev *sdev = NULL;
+ struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
unsigned int flags = 0;
+ struct iommu_sva *sva;
int ret;
- /*
- * TODO: Consolidate with generic iommu-sva bind after it is merged.
- * It will require shared SVM data structures, i.e. combine io_mm
- * and intel_svm etc.
- */
if (drvdata)
flags = *(unsigned int *)drvdata;
+
+ if (flags & SVM_FLAG_SUPERVISOR_MODE) {
+ if (!ecap_srs(iommu->ecap)) {
+ dev_err(dev, "%s: Supervisor PASID not supported\n",
+ iommu->name);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (mm) {
+ dev_err(dev, "%s: Supervisor PASID with user provided mm\n",
+ iommu->name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ mm = &init_mm;
+ }
+
mutex_lock(&pasid_mutex);
- ret = intel_svm_bind_mm(dev, flags, mm, &sdev);
- if (ret)
- sva = ERR_PTR(ret);
- else if (sdev)
- sva = &sdev->sva;
- else
- WARN(!sdev, "SVM bind succeeded with no sdev!\n");
+ ret = intel_svm_alloc_pasid(dev, mm, flags);
+ if (ret) {
+ mutex_unlock(&pasid_mutex);
+ return ERR_PTR(ret);
+ }
+ sva = intel_svm_bind_mm(iommu, dev, mm, flags);
+ if (IS_ERR_OR_NULL(sva))
+ intel_svm_free_pasid(mm);
mutex_unlock(&pasid_mutex);
return sva;
@@ -1085,10 +1072,9 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
void intel_svm_unbind(struct iommu_sva *sva)
{
- struct intel_svm_dev *sdev;
+ struct intel_svm_dev *sdev = to_intel_svm_dev(sva);
mutex_lock(&pasid_mutex);
- sdev = to_intel_svm_dev(sva);
intel_svm_unbind_mm(sdev->dev, sdev->pasid);
mutex_unlock(&pasid_mutex);
}
@@ -1194,9 +1180,14 @@ int intel_svm_page_response(struct device *dev,
desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
desc.qw2 = 0;
desc.qw3 = 0;
- if (private_present)
- memcpy(&desc.qw2, prm->private_data,
- sizeof(prm->private_data));
+
+ if (private_present) {
+ desc.qw2 = prm->private_data[0];
+ desc.qw3 = prm->private_data[1];
+ } else if (prm->private_data[0]) {
+ dmar_latency_update(iommu, DMAR_LATENCY_PRQ,
+ ktime_to_ns(ktime_get()) - prm->private_data[0]);
+ }
qi_submit_sync(iommu, &desc, 1, 0);
}
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 808ab70d5df5..5419c4b9f27a 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3059,9 +3059,6 @@ static int iommu_change_dev_def_domain(struct iommu_group *group,
int ret, dev_def_dom;
struct device *dev;
- if (!group)
- return -EINVAL;
-
mutex_lock(&group->mutex);
if (group->default_domain != group->domain) {
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index b7ecd5b08039..b6cf5f16123b 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -412,12 +412,11 @@ private_find_iova(struct iova_domain *iovad, unsigned long pfn)
return NULL;
}
-static void private_free_iova(struct iova_domain *iovad, struct iova *iova)
+static void remove_iova(struct iova_domain *iovad, struct iova *iova)
{
assert_spin_locked(&iovad->iova_rbtree_lock);
__cached_rbnode_delete_update(iovad, iova);
rb_erase(&iova->node, &iovad->rbroot);
- free_iova_mem(iova);
}
/**
@@ -452,8 +451,9 @@ __free_iova(struct iova_domain *iovad, struct iova *iova)
unsigned long flags;
spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
- private_free_iova(iovad, iova);
+ remove_iova(iovad, iova);
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+ free_iova_mem(iova);
}
EXPORT_SYMBOL_GPL(__free_iova);
@@ -472,10 +472,13 @@ free_iova(struct iova_domain *iovad, unsigned long pfn)
spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
iova = private_find_iova(iovad, pfn);
- if (iova)
- private_free_iova(iovad, iova);
+ if (!iova) {
+ spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+ return;
+ }
+ remove_iova(iovad, iova);
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-
+ free_iova_mem(iova);
}
EXPORT_SYMBOL_GPL(free_iova);
@@ -825,7 +828,8 @@ iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
if (WARN_ON(!iova))
continue;
- private_free_iova(iovad, iova);
+ remove_iova(iovad, iova);
+ free_iova_mem(iova);
}
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index aaa6a4d59057..51ea6f00db2f 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -19,7 +19,6 @@
#include <linux/iommu.h>
#include <linux/of.h>
#include <linux/of_device.h>
-#include <linux/of_iommu.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/sizes.h>
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 7880f307cb2d..3a38352b603f 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -18,7 +18,6 @@
#include <linux/iommu.h>
#include <linux/clk.h>
#include <linux/err.h>
-#include <linux/of_iommu.h>
#include <asm/cacheflush.h>
#include <linux/sizes.h>
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index e06b8a0e2b56..6f7c69688ce2 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -19,7 +19,6 @@
#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/of_address.h>
-#include <linux/of_iommu.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 5915d7b38211..778e66f5f1aa 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -22,7 +22,6 @@
#include <linux/list.h>
#include <linux/module.h>
#include <linux/of_address.h>
-#include <linux/of_iommu.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index a9d2df001149..5696314ae69e 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -19,74 +19,6 @@
#define NO_IOMMU 1
-/**
- * of_get_dma_window - Parse *dma-window property and returns 0 if found.
- *
- * @dn: device node
- * @prefix: prefix for property name if any
- * @index: index to start to parse
- * @busno: Returns busno if supported. Otherwise pass NULL
- * @addr: Returns address that DMA starts
- * @size: Returns the range that DMA can handle
- *
- * This supports different formats flexibly. "prefix" can be
- * configured if any. "busno" and "index" are optionally
- * specified. Set 0(or NULL) if not used.
- */
-int of_get_dma_window(struct device_node *dn, const char *prefix, int index,
- unsigned long *busno, dma_addr_t *addr, size_t *size)
-{
- const __be32 *dma_window, *end;
- int bytes, cur_index = 0;
- char propname[NAME_MAX], addrname[NAME_MAX], sizename[NAME_MAX];
-
- if (!dn || !addr || !size)
- return -EINVAL;
-
- if (!prefix)
- prefix = "";
-
- snprintf(propname, sizeof(propname), "%sdma-window", prefix);
- snprintf(addrname, sizeof(addrname), "%s#dma-address-cells", prefix);
- snprintf(sizename, sizeof(sizename), "%s#dma-size-cells", prefix);
-
- dma_window = of_get_property(dn, propname, &bytes);
- if (!dma_window)
- return -ENODEV;
- end = dma_window + bytes / sizeof(*dma_window);
-
- while (dma_window < end) {
- u32 cells;
- const void *prop;
-
- /* busno is one cell if supported */
- if (busno)
- *busno = be32_to_cpup(dma_window++);
-
- prop = of_get_property(dn, addrname, NULL);
- if (!prop)
- prop = of_get_property(dn, "#address-cells", NULL);
-
- cells = prop ? be32_to_cpup(prop) : of_n_addr_cells(dn);
- if (!cells)
- return -EINVAL;
- *addr = of_read_number(dma_window, cells);
- dma_window += cells;
-
- prop = of_get_property(dn, sizename, NULL);
- cells = prop ? be32_to_cpup(prop) : of_n_size_cells(dn);
- if (!cells)
- return -EINVAL;
- *size = of_read_number(dma_window, cells);
- dma_window += cells;
-
- if (cur_index++ == index)
- break;
- }
- return 0;
-}
-EXPORT_SYMBOL_GPL(of_get_dma_window);
-
static int of_iommu_xlate(struct device *dev,
struct of_phandle_args *iommu_spec)
{
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 26e517eb0dd3..91749654fd49 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -22,7 +22,6 @@
#include <linux/io.h>
#include <linux/pm_runtime.h>
#include <linux/of.h>
-#include <linux/of_iommu.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/regmap.h>
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 7a2932772fdf..94b9d8e5b9a4 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -21,7 +21,6 @@
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/of.h>
-#include <linux/of_iommu.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
@@ -96,6 +95,15 @@ static const char * const rk_iommu_clocks[] = {
"aclk", "iface",
};
+struct rk_iommu_ops {
+ phys_addr_t (*pt_address)(u32 dte);
+ u32 (*mk_dtentries)(dma_addr_t pt_dma);
+ u32 (*mk_ptentries)(phys_addr_t page, int prot);
+ phys_addr_t (*dte_addr_phys)(u32 addr);
+ u32 (*dma_addr_dte)(dma_addr_t dt_dma);
+ u64 dma_bit_mask;
+};
+
struct rk_iommu {
struct device *dev;
void __iomem **bases;
@@ -116,6 +124,7 @@ struct rk_iommudata {
};
static struct device *dma_dev;
+static const struct rk_iommu_ops *rk_ops;
static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma,
unsigned int count)
@@ -179,6 +188,33 @@ static inline phys_addr_t rk_dte_pt_address(u32 dte)
return (phys_addr_t)dte & RK_DTE_PT_ADDRESS_MASK;
}
+/*
+ * In v2:
+ * 31:12 - PT address bit 31:0
+ * 11: 8 - PT address bit 35:32
+ * 7: 4 - PT address bit 39:36
+ * 3: 1 - Reserved
+ * 0 - 1 if PT @ PT address is valid
+ */
+#define RK_DTE_PT_ADDRESS_MASK_V2 GENMASK_ULL(31, 4)
+#define DTE_HI_MASK1 GENMASK(11, 8)
+#define DTE_HI_MASK2 GENMASK(7, 4)
+#define DTE_HI_SHIFT1 24 /* shift bit 8 to bit 32 */
+#define DTE_HI_SHIFT2 32 /* shift bit 4 to bit 36 */
+#define PAGE_DESC_HI_MASK1 GENMASK_ULL(39, 36)
+#define PAGE_DESC_HI_MASK2 GENMASK_ULL(35, 32)
+
+static inline phys_addr_t rk_dte_pt_address_v2(u32 dte)
+{
+ u64 dte_v2 = dte;
+
+ dte_v2 = ((dte_v2 & DTE_HI_MASK2) << DTE_HI_SHIFT2) |
+ ((dte_v2 & DTE_HI_MASK1) << DTE_HI_SHIFT1) |
+ (dte_v2 & RK_DTE_PT_ADDRESS_MASK);
+
+ return (phys_addr_t)dte_v2;
+}
+
static inline bool rk_dte_is_pt_valid(u32 dte)
{
return dte & RK_DTE_PT_VALID;
@@ -189,6 +225,15 @@ static inline u32 rk_mk_dte(dma_addr_t pt_dma)
return (pt_dma & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID;
}
+static inline u32 rk_mk_dte_v2(dma_addr_t pt_dma)
+{
+ pt_dma = (pt_dma & RK_DTE_PT_ADDRESS_MASK) |
+ ((pt_dma & PAGE_DESC_HI_MASK1) >> DTE_HI_SHIFT1) |
+ (pt_dma & PAGE_DESC_HI_MASK2) >> DTE_HI_SHIFT2;
+
+ return (pt_dma & RK_DTE_PT_ADDRESS_MASK_V2) | RK_DTE_PT_VALID;
+}
+
/*
* Each PTE has a Page address, some flags and a valid bit:
* +---------------------+---+-------+-+
@@ -215,11 +260,6 @@ static inline u32 rk_mk_dte(dma_addr_t pt_dma)
#define RK_PTE_PAGE_READABLE BIT(1)
#define RK_PTE_PAGE_VALID BIT(0)
-static inline phys_addr_t rk_pte_page_address(u32 pte)
-{
- return (phys_addr_t)pte & RK_PTE_PAGE_ADDRESS_MASK;
-}
-
static inline bool rk_pte_is_page_valid(u32 pte)
{
return pte & RK_PTE_PAGE_VALID;
@@ -235,6 +275,29 @@ static u32 rk_mk_pte(phys_addr_t page, int prot)
return page | flags | RK_PTE_PAGE_VALID;
}
+/*
+ * In v2:
+ * 31:12 - Page address bit 31:0
+ * 11:9 - Page address bit 34:32
+ * 8:4 - Page address bit 39:35
+ * 3 - Security
+ * 2 - Readable
+ * 1 - Writable
+ * 0 - 1 if Page @ Page address is valid
+ */
+#define RK_PTE_PAGE_READABLE_V2 BIT(2)
+#define RK_PTE_PAGE_WRITABLE_V2 BIT(1)
+
+static u32 rk_mk_pte_v2(phys_addr_t page, int prot)
+{
+ u32 flags = 0;
+
+ flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE_V2 : 0;
+ flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE_V2 : 0;
+
+ return rk_mk_dte_v2(page) | flags;
+}
+
static u32 rk_mk_pte_invalid(u32 pte)
{
return pte & ~RK_PTE_PAGE_VALID;
@@ -448,10 +511,10 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
* and verifying that upper 5 nybbles are read back.
*/
for (i = 0; i < iommu->num_mmu; i++) {
- rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, DTE_ADDR_DUMMY);
+ dte_addr = rk_ops->pt_address(DTE_ADDR_DUMMY);
+ rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr);
- dte_addr = rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR);
- if (dte_addr != (DTE_ADDR_DUMMY & RK_DTE_PT_ADDRESS_MASK)) {
+ if (dte_addr != rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR)) {
dev_err(iommu->dev, "Error during raw reset. MMU_DTE_ADDR is not functioning\n");
return -EFAULT;
}
@@ -470,6 +533,31 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
return 0;
}
+static inline phys_addr_t rk_dte_addr_phys(u32 addr)
+{
+ return (phys_addr_t)addr;
+}
+
+static inline u32 rk_dma_addr_dte(dma_addr_t dt_dma)
+{
+ return dt_dma;
+}
+
+#define DT_HI_MASK GENMASK_ULL(39, 32)
+#define DT_SHIFT 28
+
+static inline phys_addr_t rk_dte_addr_phys_v2(u32 addr)
+{
+ return (phys_addr_t)(addr & RK_DTE_PT_ADDRESS_MASK) |
+ ((addr & DT_HI_MASK) << DT_SHIFT);
+}
+
+static inline u32 rk_dma_addr_dte_v2(dma_addr_t dt_dma)
+{
+ return (dt_dma & RK_DTE_PT_ADDRESS_MASK) |
+ ((dt_dma & DT_HI_MASK) >> DT_SHIFT);
+}
+
static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
{
void __iomem *base = iommu->bases[index];
@@ -489,7 +577,7 @@ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
page_offset = rk_iova_page_offset(iova);
mmu_dte_addr = rk_iommu_read(base, RK_MMU_DTE_ADDR);
- mmu_dte_addr_phys = (phys_addr_t)mmu_dte_addr;
+ mmu_dte_addr_phys = rk_ops->dte_addr_phys(mmu_dte_addr);
dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index);
dte_addr = phys_to_virt(dte_addr_phys);
@@ -498,14 +586,14 @@ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
if (!rk_dte_is_pt_valid(dte))
goto print_it;
- pte_addr_phys = rk_dte_pt_address(dte) + (pte_index * 4);
+ pte_addr_phys = rk_ops->pt_address(dte) + (pte_index * 4);
pte_addr = phys_to_virt(pte_addr_phys);
pte = *pte_addr;
if (!rk_pte_is_page_valid(pte))
goto print_it;
- page_addr_phys = rk_pte_page_address(pte) + page_offset;
+ page_addr_phys = rk_ops->pt_address(pte) + page_offset;
page_flags = pte & RK_PTE_PAGE_FLAGS_MASK;
print_it:
@@ -601,13 +689,13 @@ static phys_addr_t rk_iommu_iova_to_phys(struct iommu_domain *domain,
if (!rk_dte_is_pt_valid(dte))
goto out;
- pt_phys = rk_dte_pt_address(dte);
+ pt_phys = rk_ops->pt_address(dte);
page_table = (u32 *)phys_to_virt(pt_phys);
pte = page_table[rk_iova_pte_index(iova)];
if (!rk_pte_is_page_valid(pte))
goto out;
- phys = rk_pte_page_address(pte) + rk_iova_page_offset(iova);
+ phys = rk_ops->pt_address(pte) + rk_iova_page_offset(iova);
out:
spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
@@ -679,14 +767,13 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
return ERR_PTR(-ENOMEM);
}
- dte = rk_mk_dte(pt_dma);
+ dte = rk_ops->mk_dtentries(pt_dma);
*dte_addr = dte;
- rk_table_flush(rk_domain, pt_dma, NUM_PT_ENTRIES);
rk_table_flush(rk_domain,
rk_domain->dt_dma + dte_index * sizeof(u32), 1);
done:
- pt_phys = rk_dte_pt_address(dte);
+ pt_phys = rk_ops->pt_address(dte);
return (u32 *)phys_to_virt(pt_phys);
}
@@ -728,7 +815,7 @@ static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr,
if (rk_pte_is_page_valid(pte))
goto unwind;
- pte_addr[pte_count] = rk_mk_pte(paddr, prot);
+ pte_addr[pte_count] = rk_ops->mk_ptentries(paddr, prot);
paddr += SPAGE_SIZE;
}
@@ -750,7 +837,7 @@ unwind:
pte_count * SPAGE_SIZE);
iova += pte_count * SPAGE_SIZE;
- page_phys = rk_pte_page_address(pte_addr[pte_count]);
+ page_phys = rk_ops->pt_address(pte_addr[pte_count]);
pr_err("iova: %pad already mapped to %pa cannot remap to phys: %pa prot: %#x\n",
&iova, &page_phys, &paddr, prot);
@@ -785,7 +872,8 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
dte_index = rk_domain->dt[rk_iova_dte_index(iova)];
pte_index = rk_iova_pte_index(iova);
pte_addr = &page_table[pte_index];
- pte_dma = rk_dte_pt_address(dte_index) + pte_index * sizeof(u32);
+
+ pte_dma = rk_ops->pt_address(dte_index) + pte_index * sizeof(u32);
ret = rk_iommu_map_iova(rk_domain, pte_addr, pte_dma, iova,
paddr, size, prot);
@@ -821,7 +909,7 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
return 0;
}
- pt_phys = rk_dte_pt_address(dte);
+ pt_phys = rk_ops->pt_address(dte);
pte_addr = (u32 *)phys_to_virt(pt_phys) + rk_iova_pte_index(iova);
pte_dma = pt_phys + rk_iova_pte_index(iova) * sizeof(u32);
unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma, size);
@@ -879,7 +967,7 @@ static int rk_iommu_enable(struct rk_iommu *iommu)
for (i = 0; i < iommu->num_mmu; i++) {
rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR,
- rk_domain->dt_dma);
+ rk_ops->dma_addr_dte(rk_domain->dt_dma));
rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
}
@@ -1004,8 +1092,6 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
goto err_free_dt;
}
- rk_table_flush(rk_domain, rk_domain->dt_dma, NUM_DT_ENTRIES);
-
spin_lock_init(&rk_domain->iommus_lock);
spin_lock_init(&rk_domain->dt_lock);
INIT_LIST_HEAD(&rk_domain->iommus);
@@ -1037,7 +1123,7 @@ static void rk_iommu_domain_free(struct iommu_domain *domain)
for (i = 0; i < NUM_DT_ENTRIES; i++) {
u32 dte = rk_domain->dt[i];
if (rk_dte_is_pt_valid(dte)) {
- phys_addr_t pt_phys = rk_dte_pt_address(dte);
+ phys_addr_t pt_phys = rk_ops->pt_address(dte);
u32 *page_table = phys_to_virt(pt_phys);
dma_unmap_single(dma_dev, pt_phys,
SPAGE_SIZE, DMA_TO_DEVICE);
@@ -1127,6 +1213,7 @@ static int rk_iommu_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct rk_iommu *iommu;
struct resource *res;
+ const struct rk_iommu_ops *ops;
int num_res = pdev->num_resources;
int err, i;
@@ -1138,6 +1225,17 @@ static int rk_iommu_probe(struct platform_device *pdev)
iommu->dev = dev;
iommu->num_mmu = 0;
+ ops = of_device_get_match_data(dev);
+ if (!rk_ops)
+ rk_ops = ops;
+
+ /*
+ * That should not happen unless different versions of the
+ * hardware block are embedded the same SoC
+ */
+ if (WARN_ON(rk_ops != ops))
+ return -EINVAL;
+
iommu->bases = devm_kcalloc(dev, num_res, sizeof(*iommu->bases),
GFP_KERNEL);
if (!iommu->bases)
@@ -1226,6 +1324,8 @@ static int rk_iommu_probe(struct platform_device *pdev)
}
}
+ dma_set_mask_and_coherent(dev, rk_ops->dma_bit_mask);
+
return 0;
err_remove_sysfs:
iommu_device_sysfs_remove(&iommu->iommu);
@@ -1277,8 +1377,31 @@ static const struct dev_pm_ops rk_iommu_pm_ops = {
pm_runtime_force_resume)
};
+static struct rk_iommu_ops iommu_data_ops_v1 = {
+ .pt_address = &rk_dte_pt_address,
+ .mk_dtentries = &rk_mk_dte,
+ .mk_ptentries = &rk_mk_pte,
+ .dte_addr_phys = &rk_dte_addr_phys,
+ .dma_addr_dte = &rk_dma_addr_dte,
+ .dma_bit_mask = DMA_BIT_MASK(32),
+};
+
+static struct rk_iommu_ops iommu_data_ops_v2 = {
+ .pt_address = &rk_dte_pt_address_v2,
+ .mk_dtentries = &rk_mk_dte_v2,
+ .mk_ptentries = &rk_mk_pte_v2,
+ .dte_addr_phys = &rk_dte_addr_phys_v2,
+ .dma_addr_dte = &rk_dma_addr_dte_v2,
+ .dma_bit_mask = DMA_BIT_MASK(40),
+};
+
static const struct of_device_id rk_iommu_dt_ids[] = {
- { .compatible = "rockchip,iommu" },
+ { .compatible = "rockchip,iommu",
+ .data = &iommu_data_ops_v1,
+ },
+ { .compatible = "rockchip,rk3568-iommu",
+ .data = &iommu_data_ops_v2,
+ },
{ /* sentinel */ }
};
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index c6e5ee4d9cef..6abdcab7273b 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -10,11 +10,11 @@
#include <linux/amba/bus.h>
#include <linux/delay.h>
#include <linux/dma-iommu.h>
+#include <linux/dma-map-ops.h>
#include <linux/freezer.h>
#include <linux/interval_tree.h>
#include <linux/iommu.h>
#include <linux/module.h>
-#include <linux/of_iommu.h>
#include <linux/of_platform.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
@@ -904,6 +904,15 @@ err_free_dev:
return ERR_PTR(ret);
}
+static void viommu_probe_finalize(struct device *dev)
+{
+#ifndef CONFIG_ARCH_HAS_SETUP_DMA_OPS
+ /* First clear the DMA ops in case we're switching from a DMA domain */
+ set_dma_ops(dev, NULL);
+ iommu_setup_dma_ops(dev, 0, U64_MAX);
+#endif
+}
+
static void viommu_release_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
@@ -940,6 +949,7 @@ static struct iommu_ops viommu_ops = {
.iova_to_phys = viommu_iova_to_phys,
.iotlb_sync = viommu_iotlb_sync,
.probe_device = viommu_probe_device,
+ .probe_finalize = viommu_probe_finalize,
.release_device = viommu_release_device,
.device_group = viommu_device_group,
.get_resv_regions = viommu_get_resv_regions,
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 25d448f5af91..74afbb7a4f5e 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -17,7 +17,6 @@
#include <linux/slab.h>
#include <linux/of_address.h>
#include <linux/of_device.h>
-#include <linux/of_iommu.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c6bd4f9a80ba..1ae993fee4a5 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -592,6 +592,9 @@ struct acpi_pci_root {
bool acpi_dma_supported(const struct acpi_device *adev);
enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev);
+int acpi_iommu_fwspec_init(struct device *dev, u32 id,
+ struct fwnode_handle *fwnode,
+ const struct iommu_ops *ops);
int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset,
u64 *size);
int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr,
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index b338613fb536..6bb36fd6ba31 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -260,9 +260,12 @@ void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa);
#ifdef CONFIG_ARM64
void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa);
+void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size);
#else
static inline void
acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { }
+static inline void
+acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) { }
#endif
int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma);
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 1a12baa58e40..f1f0842a2cb2 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -34,9 +34,8 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 id,
void acpi_configure_pmsi_domain(struct device *dev);
int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id);
/* IOMMU interface */
-void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size);
-const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
- const u32 *id_in);
+int iort_dma_get_ranges(struct device *dev, u64 *size);
+int iort_iommu_configure_id(struct device *dev, const u32 *id_in);
int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head);
phys_addr_t acpi_iort_dma_get_max_cpu_address(void);
#else
@@ -48,11 +47,10 @@ static inline struct irq_domain *iort_get_device_domain(
{ return NULL; }
static inline void acpi_configure_pmsi_domain(struct device *dev) { }
/* IOMMU interface */
-static inline void iort_dma_setup(struct device *dev, u64 *dma_addr,
- u64 *size) { }
-static inline const struct iommu_ops *iort_iommu_configure_id(
- struct device *dev, const u32 *id_in)
-{ return NULL; }
+static inline int iort_dma_get_ranges(struct device *dev, u64 *size)
+{ return -ENODEV; }
+static inline int iort_iommu_configure_id(struct device *dev, const u32 *id_in)
+{ return -ENODEV; }
static inline
int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
{ return 0; }
diff --git a/include/linux/acpi_viot.h b/include/linux/acpi_viot.h
new file mode 100644
index 000000000000..1eb8ee5b0e5f
--- /dev/null
+++ b/include/linux/acpi_viot.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ACPI_VIOT_H__
+#define __ACPI_VIOT_H__
+
+#include <linux/acpi.h>
+
+#ifdef CONFIG_ACPI_VIOT
+void __init acpi_viot_init(void);
+int viot_iommu_configure(struct device *dev);
+#else
+static inline void acpi_viot_init(void) {}
+static inline int viot_iommu_configure(struct device *dev)
+{
+ return -ENODEV;
+}
+#endif
+
+#endif /* __ACPI_VIOT_H__ */
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 6e75a2d689b4..758ca4694257 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -19,7 +19,7 @@ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
void iommu_put_dma_cookie(struct iommu_domain *domain);
/* Setup call for arch DMA mapping code */
-void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size);
+void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit);
/* The DMA API isn't _quite_ the whole story, though... */
/*
@@ -50,7 +50,7 @@ struct msi_msg;
struct device;
static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base,
- u64 size)
+ u64 dma_limit)
{
}
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 03faf20a6817..d0fa0b31994d 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -537,7 +537,7 @@ struct context_entry {
struct dmar_domain {
int nid; /* node id */
- unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED];
+ unsigned int iommu_refcnt[DMAR_UNITS_SUPPORTED];
/* Refcount of devices per iommu */
@@ -546,7 +546,10 @@ struct dmar_domain {
* domain ids are 16 bit wide according
* to VT-d spec, section 9.3 */
- bool has_iotlb_device;
+ u8 has_iotlb_device: 1;
+ u8 iommu_coherency: 1; /* indicate coherency of iommu access */
+ u8 iommu_snooping: 1; /* indicate snooping control feature */
+
struct list_head devices; /* all devices' list */
struct list_head subdevices; /* all subdevices' list */
struct iova_domain iovad; /* iova's that belong to this domain */
@@ -558,10 +561,6 @@ struct dmar_domain {
int agaw;
int flags; /* flags to find out type of domain */
-
- int iommu_coherency;/* indicate coherency of iommu access */
- int iommu_snooping; /* indicate snooping control feature*/
- int iommu_count; /* reference count of iommu */
int iommu_superpage;/* Level of superpages supported:
0 == 4KiB (no superpages), 1 == 2MiB,
2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
@@ -606,6 +605,8 @@ struct intel_iommu {
struct completion prq_complete;
struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */
#endif
+ struct iopf_queue *iopf_queue;
+ unsigned char iopfq_name[16];
struct q_inval *qi; /* Queued invalidation info */
u32 *iommu_state; /* Store iommu states between suspend and resume.*/
@@ -619,6 +620,7 @@ struct intel_iommu {
u32 flags; /* Software defined flags */
struct dmar_drhd_unit *drhd;
+ void *perf_statistic;
};
/* Per subdevice private data */
@@ -776,6 +778,7 @@ struct intel_svm_dev {
struct device *dev;
struct intel_iommu *iommu;
struct iommu_sva sva;
+ unsigned long prq_seq_number;
u32 pasid;
int users;
u16 did;
@@ -791,7 +794,6 @@ struct intel_svm {
u32 pasid;
int gpasid; /* In case that guest PASID is different from host PASID */
struct list_head devs;
- struct list_head list;
};
#else
static inline void intel_svm_check(struct intel_iommu *iommu) {}
@@ -827,4 +829,32 @@ static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
#define intel_iommu_enabled (0)
#endif
+static inline const char *decode_prq_descriptor(char *str, size_t size,
+ u64 dw0, u64 dw1, u64 dw2, u64 dw3)
+{
+ char *buf = str;
+ int bytes;
+
+ bytes = snprintf(buf, size,
+ "rid=0x%llx addr=0x%llx %c%c%c%c%c pasid=0x%llx index=0x%llx",
+ FIELD_GET(GENMASK_ULL(31, 16), dw0),
+ FIELD_GET(GENMASK_ULL(63, 12), dw1),
+ dw1 & BIT_ULL(0) ? 'r' : '-',
+ dw1 & BIT_ULL(1) ? 'w' : '-',
+ dw0 & BIT_ULL(52) ? 'x' : '-',
+ dw0 & BIT_ULL(53) ? 'p' : '-',
+ dw1 & BIT_ULL(2) ? 'l' : '-',
+ FIELD_GET(GENMASK_ULL(51, 32), dw0),
+ FIELD_GET(GENMASK_ULL(11, 3), dw1));
+
+ /* Private Data */
+ if (dw0 & BIT_ULL(9)) {
+ size -= bytes;
+ buf += bytes;
+ snprintf(buf, size, " private=0x%llx/0x%llx\n", dw2, dw3);
+ }
+
+ return str;
+}
+
#endif
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index 16f4b3e87f20..55c1eb300a86 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -2,29 +2,18 @@
#ifndef __OF_IOMMU_H
#define __OF_IOMMU_H
-#include <linux/device.h>
-#include <linux/iommu.h>
-#include <linux/of.h>
+struct device;
+struct device_node;
+struct iommu_ops;
#ifdef CONFIG_OF_IOMMU
-extern int of_get_dma_window(struct device_node *dn, const char *prefix,
- int index, unsigned long *busno, dma_addr_t *addr,
- size_t *size);
-
extern const struct iommu_ops *of_iommu_configure(struct device *dev,
struct device_node *master_np,
const u32 *id);
#else
-static inline int of_get_dma_window(struct device_node *dn, const char *prefix,
- int index, unsigned long *busno, dma_addr_t *addr,
- size_t *size)
-{
- return -EINVAL;
-}
-
static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
struct device_node *master_np,
const u32 *id)
diff --git a/include/trace/events/intel_iommu.h b/include/trace/events/intel_iommu.h
index d233f2916584..e5c1ca6d16ee 100644
--- a/include/trace/events/intel_iommu.h
+++ b/include/trace/events/intel_iommu.h
@@ -15,6 +15,8 @@
#include <linux/tracepoint.h>
#include <linux/intel-iommu.h>
+#define MSG_MAX 256
+
TRACE_EVENT(qi_submit,
TP_PROTO(struct intel_iommu *iommu, u64 qw0, u64 qw1, u64 qw2, u64 qw3),
@@ -51,6 +53,41 @@ TRACE_EVENT(qi_submit,
__entry->qw0, __entry->qw1, __entry->qw2, __entry->qw3
)
);
+
+TRACE_EVENT(prq_report,
+ TP_PROTO(struct intel_iommu *iommu, struct device *dev,
+ u64 dw0, u64 dw1, u64 dw2, u64 dw3,
+ unsigned long seq),
+
+ TP_ARGS(iommu, dev, dw0, dw1, dw2, dw3, seq),
+
+ TP_STRUCT__entry(
+ __field(u64, dw0)
+ __field(u64, dw1)
+ __field(u64, dw2)
+ __field(u64, dw3)
+ __field(unsigned long, seq)
+ __string(iommu, iommu->name)
+ __string(dev, dev_name(dev))
+ __dynamic_array(char, buff, MSG_MAX)
+ ),
+
+ TP_fast_assign(
+ __entry->dw0 = dw0;
+ __entry->dw1 = dw1;
+ __entry->dw2 = dw2;
+ __entry->dw3 = dw3;
+ __entry->seq = seq;
+ __assign_str(iommu, iommu->name);
+ __assign_str(dev, dev_name(dev));
+ ),
+
+ TP_printk("%s/%s seq# %ld: %s",
+ __get_str(iommu), __get_str(dev), __entry->seq,
+ decode_prq_descriptor(__get_str(buff), MSG_MAX, __entry->dw0,
+ __entry->dw1, __entry->dw2, __entry->dw3)
+ )
+);
#endif /* _TRACE_INTEL_IOMMU_H */
/* This part must be outside protection */