diff options
author | Joerg Roedel <jroedel@suse.de> | 2020-01-17 13:01:01 +0300 |
---|---|---|
committer | Joerg Roedel <jroedel@suse.de> | 2020-01-17 13:01:01 +0300 |
commit | 6855d1ba7537e21e8f36988bad36d1ad76841f1d (patch) | |
tree | eaffe991a923ec4c77cec99e46f71f7f7868814b | |
parent | 1ea27ee2f76e67f98b9942988f1336a70d351317 (diff) | |
parent | 5a4549fd790500d7db94b7d2af6d60cee42110c3 (diff) | |
download | linux-6855d1ba7537e21e8f36988bad36d1ad76841f1d.tar.xz |
Merge tag 'arm-smmu-updates' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into arm/smmu
Arm SMMU updates for 5.6
- Support for building, and {un,}loading the SMMU drivers as modules
- Minor cleanups
- SMMUv3:
* Non-critical fix to encoding of TLBI_NH_VA invalidation command
* Fix broken sanity check on size of MMIO resource during probe
* Support for Substream IDs which will soon be provided by PCI PASIDs
- io-pgtable:
* Finish off the TTBR1 preparation work partially merged last cycle
* Ensure correct memory attributes for non-cacheable mappings
- SMMU:
* Namespace public #defines to avoid collisions with arch/arm64/
* Avoid using valid SMR register when probing mask size
-rw-r--r-- | Documentation/devicetree/bindings/iommu/iommu.txt | 6 | ||||
-rw-r--r-- | drivers/acpi/arm64/iort.c | 18 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu-impl.c | 2 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu-v3.c | 499 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu.c | 197 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu.h | 228 | ||||
-rw-r--r-- | drivers/iommu/io-pgtable-arm-v7s.c | 22 | ||||
-rw-r--r-- | drivers/iommu/io-pgtable-arm.c | 164 | ||||
-rw-r--r-- | drivers/iommu/io-pgtable.c | 2 | ||||
-rw-r--r-- | drivers/iommu/ipmmu-vmsa.c | 2 | ||||
-rw-r--r-- | drivers/iommu/msm_iommu.c | 4 | ||||
-rw-r--r-- | drivers/iommu/mtk_iommu.c | 4 | ||||
-rw-r--r-- | drivers/iommu/of_iommu.c | 6 | ||||
-rw-r--r-- | drivers/iommu/qcom_iommu.c | 25 | ||||
-rw-r--r-- | include/linux/io-pgtable.h | 27 | ||||
-rw-r--r-- | include/linux/iommu.h | 13 | ||||
-rw-r--r-- | include/linux/pci-ats.h | 3 |
17 files changed, 810 insertions, 412 deletions
diff --git a/Documentation/devicetree/bindings/iommu/iommu.txt b/Documentation/devicetree/bindings/iommu/iommu.txt index 5a8b4624defc..3c36334e4f94 100644 --- a/Documentation/devicetree/bindings/iommu/iommu.txt +++ b/Documentation/devicetree/bindings/iommu/iommu.txt @@ -86,6 +86,12 @@ have a means to turn off translation. But it is invalid in such cases to disable the IOMMU's device tree node in the first place because it would prevent any driver from properly setting up the translations. +Optional properties: +-------------------- +- pasid-num-bits: Some masters support multiple address spaces for DMA, by + tagging DMA transactions with an address space identifier. By default, + this is 0, which means that the device only has one address space. + Notes: ====== diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 4a560fdf7386..9f0a9f9339f9 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) "ACPI: IORT: " fmt #include <linux/acpi_iort.h> +#include <linux/bitfield.h> #include <linux/iommu.h> #include <linux/kernel.h> #include <linux/list.h> @@ -924,6 +925,20 @@ static int iort_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) return iort_iommu_xlate(info->dev, parent, streamid); } +static void iort_named_component_init(struct device *dev, + struct acpi_iort_node *node) +{ + struct acpi_iort_named_component *nc; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + if (!fwspec) + return; + + nc = (struct acpi_iort_named_component *)node->node_data; + fwspec->num_pasid_bits = FIELD_GET(ACPI_IORT_NC_PASID_BITS, + nc->node_flags); +} + /** * iort_iommu_configure - Set-up IOMMU configuration for a device. * @@ -978,6 +993,9 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) if (parent) err = iort_iommu_xlate(dev, parent, streamid); } while (parent && !err); + + if (!err) + iort_named_component_init(dev, node); } /* diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c index b2fe72a8f019..74d97a886e93 100644 --- a/drivers/iommu/arm-smmu-impl.c +++ b/drivers/iommu/arm-smmu-impl.c @@ -119,7 +119,7 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu) * Secure has also cleared SACR.CACHE_LOCK for this to take effect... */ reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID7); - major = FIELD_GET(ID7_MAJOR, reg); + major = FIELD_GET(ARM_SMMU_ID7_MAJOR, reg); reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sACR); if (major >= 2) reg &= ~ARM_MMU500_ACR_CACHE_LOCK; diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 03dc97842875..5e04c1f3992a 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -223,9 +223,15 @@ #define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4) #define STRTAB_STE_0_S1FMT_LINEAR 0 +#define STRTAB_STE_0_S1FMT_64K_L2 2 #define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6) #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59) +#define STRTAB_STE_1_S1DSS GENMASK_ULL(1, 0) +#define STRTAB_STE_1_S1DSS_TERMINATE 0x0 +#define STRTAB_STE_1_S1DSS_BYPASS 0x1 +#define STRTAB_STE_1_S1DSS_SSID0 0x2 + #define STRTAB_STE_1_S1C_CACHE_NC 0UL #define STRTAB_STE_1_S1C_CACHE_WBRA 1UL #define STRTAB_STE_1_S1C_CACHE_WT 2UL @@ -250,6 +256,13 @@ #define STRTAB_STE_2_S2VMID GENMASK_ULL(15, 0) #define STRTAB_STE_2_VTCR GENMASK_ULL(50, 32) +#define STRTAB_STE_2_VTCR_S2T0SZ GENMASK_ULL(5, 0) +#define STRTAB_STE_2_VTCR_S2SL0 GENMASK_ULL(7, 6) +#define STRTAB_STE_2_VTCR_S2IR0 GENMASK_ULL(9, 8) +#define STRTAB_STE_2_VTCR_S2OR0 GENMASK_ULL(11, 10) +#define STRTAB_STE_2_VTCR_S2SH0 GENMASK_ULL(13, 12) +#define STRTAB_STE_2_VTCR_S2TG GENMASK_ULL(15, 14) +#define STRTAB_STE_2_VTCR_S2PS GENMASK_ULL(18, 16) #define STRTAB_STE_2_S2AA64 (1UL << 51) #define STRTAB_STE_2_S2ENDI (1UL << 52) #define STRTAB_STE_2_S2PTW (1UL << 54) @@ -257,30 +270,34 @@ #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4) -/* Context descriptor (stage-1 only) */ +/* + * Context descriptors. + * + * Linear: when less than 1024 SSIDs are supported + * 2lvl: at most 1024 L1 entries, + * 1024 lazy entries per table. + */ +#define CTXDESC_SPLIT 10 +#define CTXDESC_L2_ENTRIES (1 << CTXDESC_SPLIT) + +#define CTXDESC_L1_DESC_DWORDS 1 +#define CTXDESC_L1_DESC_V (1UL << 0) +#define CTXDESC_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 12) + #define CTXDESC_CD_DWORDS 8 #define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0) -#define ARM64_TCR_T0SZ GENMASK_ULL(5, 0) #define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6) -#define ARM64_TCR_TG0 GENMASK_ULL(15, 14) #define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8) -#define ARM64_TCR_IRGN0 GENMASK_ULL(9, 8) #define CTXDESC_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10) -#define ARM64_TCR_ORGN0 GENMASK_ULL(11, 10) #define CTXDESC_CD_0_TCR_SH0 GENMASK_ULL(13, 12) -#define ARM64_TCR_SH0 GENMASK_ULL(13, 12) #define CTXDESC_CD_0_TCR_EPD0 (1ULL << 14) -#define ARM64_TCR_EPD0 (1ULL << 7) #define CTXDESC_CD_0_TCR_EPD1 (1ULL << 30) -#define ARM64_TCR_EPD1 (1ULL << 23) #define CTXDESC_CD_0_ENDI (1UL << 15) #define CTXDESC_CD_0_V (1UL << 31) #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32) -#define ARM64_TCR_IPS GENMASK_ULL(34, 32) #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38) -#define ARM64_TCR_TBI0 (1ULL << 37) #define CTXDESC_CD_0_AA64 (1UL << 41) #define CTXDESC_CD_0_S (1UL << 44) @@ -291,9 +308,11 @@ #define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4) -/* Convert between AArch64 (CPU) TCR format and SMMU CD format */ -#define ARM_SMMU_TCR2CD(tcr, fld) FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \ - FIELD_GET(ARM64_TCR_##fld, tcr)) +/* + * When the SMMU only supports linear context descriptor tables, pick a + * reasonable size limit (64kB). + */ +#define CTXDESC_LINEAR_CDMAX ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3)) /* Command queue */ #define CMDQ_ENT_SZ_SHIFT 4 @@ -322,6 +341,7 @@ #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0) #define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12) +#define CMDQ_CFGI_0_SSID GENMASK_ULL(31, 12) #define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32) #define CMDQ_CFGI_1_LEAF (1UL << 0) #define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0) @@ -435,8 +455,11 @@ struct arm_smmu_cmdq_ent { #define CMDQ_OP_CFGI_STE 0x3 #define CMDQ_OP_CFGI_ALL 0x4 + #define CMDQ_OP_CFGI_CD 0x5 + #define CMDQ_OP_CFGI_CD_ALL 0x6 struct { u32 sid; + u32 ssid; union { bool leaf; u8 span; @@ -542,16 +565,30 @@ struct arm_smmu_strtab_l1_desc { dma_addr_t l2ptr_dma; }; +struct arm_smmu_ctx_desc { + u16 asid; + u64 ttbr; + u64 tcr; + u64 mair; +}; + +struct arm_smmu_l1_ctx_desc { + __le64 *l2ptr; + dma_addr_t l2ptr_dma; +}; + +struct arm_smmu_ctx_desc_cfg { + __le64 *cdtab; + dma_addr_t cdtab_dma; + struct arm_smmu_l1_ctx_desc *l1_desc; + unsigned int num_l1_ents; +}; + struct arm_smmu_s1_cfg { - __le64 *cdptr; - dma_addr_t cdptr_dma; - - struct arm_smmu_ctx_desc { - u16 asid; - u64 ttbr; - u64 tcr; - u64 mair; - } cd; + struct arm_smmu_ctx_desc_cfg cdcfg; + struct arm_smmu_ctx_desc cd; + u8 s1fmt; + u8 s1cdmax; }; struct arm_smmu_s2_cfg { @@ -633,6 +670,7 @@ struct arm_smmu_master { u32 *sids; unsigned int num_sids; bool ats_enabled; + unsigned int ssid_bits; }; /* SMMU private data for an IOMMU domain */ @@ -842,15 +880,22 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size); cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK; break; + case CMDQ_OP_CFGI_CD: + cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid); + /* Fallthrough */ case CMDQ_OP_CFGI_STE: cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf); break; + case CMDQ_OP_CFGI_CD_ALL: + cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); + break; case CMDQ_OP_CFGI_ALL: /* Cover the entire SID range */ cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); break; case CMDQ_OP_TLBI_NH_VA: + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK; @@ -1438,50 +1483,238 @@ static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) } /* Context descriptor manipulation functions */ -static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr) +static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, + int ssid, bool leaf) { - u64 val = 0; + size_t i; + unsigned long flags; + struct arm_smmu_master *master; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_cmdq_ent cmd = { + .opcode = CMDQ_OP_CFGI_CD, + .cfgi = { + .ssid = ssid, + .leaf = leaf, + }, + }; + + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master, &smmu_domain->devices, domain_head) { + for (i = 0; i < master->num_sids; i++) { + cmd.cfgi.sid = master->sids[i]; + arm_smmu_cmdq_issue_cmd(smmu, &cmd); + } + } + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - /* Repack the TCR. Just care about TTBR0 for now */ - val |= ARM_SMMU_TCR2CD(tcr, T0SZ); - val |= ARM_SMMU_TCR2CD(tcr, TG0); - val |= ARM_SMMU_TCR2CD(tcr, IRGN0); - val |= ARM_SMMU_TCR2CD(tcr, ORGN0); - val |= ARM_SMMU_TCR2CD(tcr, SH0); - val |= ARM_SMMU_TCR2CD(tcr, EPD0); - val |= ARM_SMMU_TCR2CD(tcr, EPD1); - val |= ARM_SMMU_TCR2CD(tcr, IPS); + arm_smmu_cmdq_issue_sync(smmu); +} - return val; +static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu, + struct arm_smmu_l1_ctx_desc *l1_desc) +{ + size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); + + l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, + &l1_desc->l2ptr_dma, GFP_KERNEL); + if (!l1_desc->l2ptr) { + dev_warn(smmu->dev, + "failed to allocate context descriptor table\n"); + return -ENOMEM; + } + return 0; } -static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu, - struct arm_smmu_s1_cfg *cfg) +static void arm_smmu_write_cd_l1_desc(__le64 *dst, + struct arm_smmu_l1_ctx_desc *l1_desc) { - u64 val; + u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | + CTXDESC_L1_DESC_V; + + WRITE_ONCE(*dst, cpu_to_le64(val)); +} + +static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain, + u32 ssid) +{ + __le64 *l1ptr; + unsigned int idx; + struct arm_smmu_l1_ctx_desc *l1_desc; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg; + + if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR) + return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS; + + idx = ssid >> CTXDESC_SPLIT; + l1_desc = &cdcfg->l1_desc[idx]; + if (!l1_desc->l2ptr) { + if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) + return NULL; + + l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS; + arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); + /* An invalid L1CD can be cached */ + arm_smmu_sync_cd(smmu_domain, ssid, false); + } + idx = ssid & (CTXDESC_L2_ENTRIES - 1); + return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS; +} +static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, + int ssid, struct arm_smmu_ctx_desc *cd) +{ /* - * We don't need to issue any invalidation here, as we'll invalidate - * the STE when installing the new entry anyway. + * This function handles the following cases: + * + * (1) Install primary CD, for normal DMA traffic (SSID = 0). + * (2) Install a secondary CD, for SID+SSID traffic. + * (3) Update ASID of a CD. Atomically write the first 64 bits of the + * CD, then invalidate the old entry and mappings. + * (4) Remove a secondary CD. */ - val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) | + u64 val; + bool cd_live; + __le64 *cdptr; + struct arm_smmu_device *smmu = smmu_domain->smmu; + + if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax))) + return -E2BIG; + + cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid); + if (!cdptr) + return -ENOMEM; + + val = le64_to_cpu(cdptr[0]); + cd_live = !!(val & CTXDESC_CD_0_V); + + if (!cd) { /* (4) */ + val = 0; + } else if (cd_live) { /* (3) */ + val &= ~CTXDESC_CD_0_ASID; + val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid); + /* + * Until CD+TLB invalidation, both ASIDs may be used for tagging + * this substream's traffic + */ + } else { /* (1) and (2) */ + cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK); + cdptr[2] = 0; + cdptr[3] = cpu_to_le64(cd->mair); + + /* + * STE is live, and the SMMU might read dwords of this CD in any + * order. Ensure that it observes valid values before reading + * V=1. + */ + arm_smmu_sync_cd(smmu_domain, ssid, true); + + val = cd->tcr | #ifdef __BIG_ENDIAN - CTXDESC_CD_0_ENDI | + CTXDESC_CD_0_ENDI | #endif - CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET | - CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) | - CTXDESC_CD_0_V; + CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET | + CTXDESC_CD_0_AA64 | + FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | + CTXDESC_CD_0_V; + + /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ + if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) + val |= CTXDESC_CD_0_S; + } + + /* + * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3 + * "Configuration structures and configuration invalidation completion" + * + * The size of single-copy atomic reads made by the SMMU is + * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single + * field within an aligned 64-bit span of a structure can be altered + * without first making the structure invalid. + */ + WRITE_ONCE(cdptr[0], cpu_to_le64(val)); + arm_smmu_sync_cd(smmu_domain, ssid, true); + return 0; +} + +static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) +{ + int ret; + size_t l1size; + size_t max_contexts; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; + struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg; - /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ - if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) - val |= CTXDESC_CD_0_S; + max_contexts = 1 << cfg->s1cdmax; - cfg->cdptr[0] = cpu_to_le64(val); + if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) || + max_contexts <= CTXDESC_L2_ENTRIES) { + cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; + cdcfg->num_l1_ents = max_contexts; - val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK; - cfg->cdptr[1] = cpu_to_le64(val); + l1size = max_contexts * (CTXDESC_CD_DWORDS << 3); + } else { + cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2; + cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts, + CTXDESC_L2_ENTRIES); + + cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents, + sizeof(*cdcfg->l1_desc), + GFP_KERNEL); + if (!cdcfg->l1_desc) + return -ENOMEM; + + l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); + } + + cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma, + GFP_KERNEL); + if (!cdcfg->cdtab) { + dev_warn(smmu->dev, "failed to allocate context descriptor\n"); + ret = -ENOMEM; + goto err_free_l1; + } + + return 0; + +err_free_l1: + if (cdcfg->l1_desc) { + devm_kfree(smmu->dev, cdcfg->l1_desc); + cdcfg->l1_desc = NULL; + } + return ret; +} + +static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain) +{ + int i; + size_t size, l1size; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg; + + if (cdcfg->l1_desc) { + size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); + + for (i = 0; i < cdcfg->num_l1_ents; i++) { + if (!cdcfg->l1_desc[i].l2ptr) + continue; + + dmam_free_coherent(smmu->dev, size, + cdcfg->l1_desc[i].l2ptr, + cdcfg->l1_desc[i].l2ptr_dma); + } + devm_kfree(smmu->dev, cdcfg->l1_desc); + cdcfg->l1_desc = NULL; + + l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); + } else { + l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3); + } - cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair); + dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma); + cdcfg->cdtab_dma = 0; + cdcfg->cdtab = NULL; } /* Stream table manipulation functions */ @@ -1603,6 +1836,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, if (s1_cfg) { BUG_ON(ste_live); dst[1] = cpu_to_le64( + FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | @@ -1612,8 +1846,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); - val |= (s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) | - FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS); + val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | + FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) | + FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt); } if (s2_cfg) { @@ -1637,7 +1873,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, STRTAB_STE_1_EATS_TRANS)); arm_smmu_sync_ste_for_sid(smmu, sid); - dst[0] = cpu_to_le64(val); + /* See comment in arm_smmu_write_ctx_desc() */ + WRITE_ONCE(dst[0], cpu_to_le64(val)); arm_smmu_sync_ste_for_sid(smmu, sid); /* It's likely that we'll want to use the new STE soon */ @@ -1670,7 +1907,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) desc->span = STRTAB_SPLIT + 1; desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma, - GFP_KERNEL | __GFP_ZERO); + GFP_KERNEL); if (!desc->l2ptr) { dev_err(smmu->dev, "failed to allocate l2 stream table for SID %u\n", @@ -2126,12 +2363,8 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; - if (cfg->cdptr) { - dmam_free_coherent(smmu_domain->smmu->dev, - CTXDESC_CD_DWORDS << 3, - cfg->cdptr, - cfg->cdptr_dma); - + if (cfg->cdcfg.cdtab) { + arm_smmu_free_cd_tables(smmu_domain); arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid); } } else { @@ -2144,55 +2377,82 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) } static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, + struct arm_smmu_master *master, struct io_pgtable_cfg *pgtbl_cfg) { int ret; int asid; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; + typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr; asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits); if (asid < 0) return asid; - cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3, - &cfg->cdptr_dma, - GFP_KERNEL | __GFP_ZERO); - if (!cfg->cdptr) { - dev_warn(smmu->dev, "failed to allocate context descriptor\n"); - ret = -ENOMEM; + cfg->s1cdmax = master->ssid_bits; + + ret = arm_smmu_alloc_cd_tables(smmu_domain); + if (ret) goto out_free_asid; - } cfg->cd.asid = (u16)asid; - cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; - cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr; + cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; + cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) | + FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) | + FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) | + FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) | + FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | + FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | + CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; + + /* + * Note that this will end up calling arm_smmu_sync_cd() before + * the master has been added to the devices list for this domain. + * This isn't an issue because the STE hasn't been installed yet. + */ + ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd); + if (ret) + goto out_free_cd_tables; + return 0; +out_free_cd_tables: + arm_smmu_free_cd_tables(smmu_domain); out_free_asid: arm_smmu_bitmap_free(smmu->asid_map, asid); return ret; } static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, + struct arm_smmu_master *master, struct io_pgtable_cfg *pgtbl_cfg) { int vmid; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; + typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr; vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits); if (vmid < 0) return vmid; + vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr; cfg->vmid = (u16)vmid; cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; - cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; + cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps); return 0; } -static int arm_smmu_domain_finalise(struct iommu_domain *domain) +static int arm_smmu_domain_finalise(struct iommu_domain *domain, + struct arm_smmu_master *master) { int ret; unsigned long ias, oas; @@ -2200,6 +2460,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable_ops *pgtbl_ops; int (*finalise_stage_fn)(struct arm_smmu_domain *, + struct arm_smmu_master *, struct io_pgtable_cfg *); struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; @@ -2254,7 +2515,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1; domain->geometry.force_aperture = true; - ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg); + ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg); if (ret < 0) { free_io_pgtable_ops(pgtbl_ops); return ret; @@ -2407,7 +2668,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) if (!smmu_domain->smmu) { smmu_domain->smmu = smmu; - ret = arm_smmu_domain_finalise(domain); + ret = arm_smmu_domain_finalise(domain, master); if (ret) { smmu_domain->smmu = NULL; goto out_unlock; @@ -2419,6 +2680,13 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) dev_name(smmu->dev)); ret = -ENXIO; goto out_unlock; + } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && + master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) { + dev_err(dev, + "cannot attach to incompatible domain (%u SSID bits != %u)\n", + smmu_domain->s1_cfg.s1cdmax, master->ssid_bits); + ret = -EINVAL; + goto out_unlock; } master->domain = smmu_domain; @@ -2426,9 +2694,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS) master->ats_enabled = arm_smmu_ats_supported(master); - if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) - arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg); - arm_smmu_install_ste_for_dev(master); spin_lock_irqsave(&smmu_domain->devices_lock, flags); @@ -2529,51 +2794,66 @@ static int arm_smmu_add_device(struct device *dev) if (!fwspec || fwspec->ops != &arm_smmu_ops) return -ENODEV; - /* - * We _can_ actually withstand dodgy bus code re-calling add_device() - * without an intervening remove_device()/of_xlate() sequence, but - * we're not going to do so quietly... - */ - if (WARN_ON_ONCE(fwspec->iommu_priv)) { - master = fwspec->iommu_priv; - smmu = master->smmu; - } else { - smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); - if (!smmu) - return -ENODEV; - master = kzalloc(sizeof(*master), GFP_KERNEL); - if (!master) - return -ENOMEM; - master->dev = dev; - master->smmu = smmu; - master->sids = fwspec->ids; - master->num_sids = fwspec->num_ids; - fwspec->iommu_priv = master; - } + if (WARN_ON_ONCE(fwspec->iommu_priv)) + return -EBUSY; + + smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); + if (!smmu) + return -ENODEV; + + master = kzalloc(sizeof(*master), GFP_KERNEL); + if (!master) + return -ENOMEM; + + master->dev = dev; + master->smmu = smmu; + master->sids = fwspec->ids; + master->num_sids = fwspec->num_ids; + fwspec->iommu_priv = master; /* Check the SIDs are in range of the SMMU and our stream table */ for (i = 0; i < master->num_sids; i++) { u32 sid = master->sids[i]; - if (!arm_smmu_sid_in_range(smmu, sid)) - return -ERANGE; + if (!arm_smmu_sid_in_range(smmu, sid)) { + ret = -ERANGE; + goto err_free_master; + } /* Ensure l2 strtab is initialised */ if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { ret = arm_smmu_init_l2_strtab(smmu, sid); if (ret) - return ret; + goto err_free_master; } } + master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits); + + if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB)) + master->ssid_bits = min_t(u8, master->ssid_bits, + CTXDESC_LINEAR_CDMAX); + + ret = iommu_device_link(&smmu->iommu, dev); + if (ret) + goto err_free_master; + group = iommu_group_get_for_dev(dev); - if (!IS_ERR(group)) { - iommu_group_put(group); - iommu_device_link(&smmu->iommu, dev); + if (IS_ERR(group)) { + ret = PTR_ERR(group); + goto err_unlink; } - return PTR_ERR_OR_ZERO(group); + iommu_group_put(group); + return 0; + +err_unlink: + iommu_device_unlink(&smmu->iommu, dev); +err_free_master: + kfree(master); + fwspec->iommu_priv = NULL; + return ret; } static void arm_smmu_remove_device(struct device *dev) @@ -2733,7 +3013,6 @@ static struct iommu_ops arm_smmu_ops = { .get_resv_regions = arm_smmu_get_resv_regions, .put_resv_regions = arm_smmu_put_resv_regions, .pgsize_bitmap = -1UL, /* Restricted during device attach */ - .owner = THIS_MODULE, }; /* Probing and initialisation functions */ @@ -2879,7 +3158,7 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, - GFP_KERNEL | __GFP_ZERO); + GFP_KERNEL); if (!strtab) { dev_err(smmu->dev, "failed to allocate l1 stream table (%u bytes)\n", @@ -2906,7 +3185,7 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3); strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma, - GFP_KERNEL | __GFP_ZERO); + GFP_KERNEL); if (!strtab) { dev_err(smmu->dev, "failed to allocate linear stream table (%u bytes)\n", @@ -3632,7 +3911,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Base address */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) { + if (resource_size(res) < arm_smmu_resource_size(smmu)) { dev_err(dev, "MMIO region too small (%pr)\n", res); return -EINVAL; } @@ -3722,7 +4001,7 @@ MODULE_DEVICE_TABLE(of, arm_smmu_of_match); static struct platform_driver arm_smmu_driver = { .driver = { .name = "arm-smmu-v3", - .of_match_table = of_match_ptr(arm_smmu_of_match), + .of_match_table = arm_smmu_of_match, .suppress_bind_attrs = true, }, .probe = arm_smmu_device_probe, diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 5ef1f2e100d7..cca94c30b301 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -271,7 +271,7 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page, for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) { for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { reg = arm_smmu_readl(smmu, page, status); - if (!(reg & sTLBGSTATUS_GSACTIVE)) + if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE)) return; cpu_relax(); } @@ -478,7 +478,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) int idx = smmu_domain->cfg.cbndx; fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (!(fsr & FSR_FAULT)) + if (!(fsr & ARM_SMMU_FSR_FAULT)) return IRQ_NONE; fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); @@ -510,7 +510,7 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) if (__ratelimit(&rs)) { if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) && - (gfsr & sGFSR_USF)) + (gfsr & ARM_SMMU_sGFSR_USF)) dev_err(smmu->dev, "Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n", (u16)gfsynr1); @@ -540,26 +540,28 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr; } else { - cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr; - cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32; - cb->tcr[1] |= FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM); + cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg); + cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg); if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) - cb->tcr[1] |= TCR2_AS; + cb->tcr[1] |= ARM_SMMU_TCR2_AS; + else + cb->tcr[0] |= ARM_SMMU_TCR_EAE; } } else { - cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; + cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg); } /* TTBRs */ if (stage1) { if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { - cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0]; - cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1]; + cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr; + cb->ttbr[1] = 0; } else { - cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; - cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid); - cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; - cb->ttbr[1] |= FIELD_PREP(TTBRn_ASID, cfg->asid); + cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; + cb->ttbr[0] |= FIELD_PREP(ARM_SMMU_TTBRn_ASID, + cfg->asid); + cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID, + cfg->asid); } } else { cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; @@ -595,31 +597,33 @@ static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx) /* CBA2R */ if (smmu->version > ARM_SMMU_V1) { if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) - reg = CBA2R_VA64; + reg = ARM_SMMU_CBA2R_VA64; else reg = 0; /* 16-bit VMIDs live in CBA2R */ if (smmu->features & ARM_SMMU_FEAT_VMID16) - reg |= FIELD_PREP(CBA2R_VMID16, cfg->vmid); + reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid); arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg); } /* CBAR */ - reg = FIELD_PREP(CBAR_TYPE, cfg->cbar); + reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar); if (smmu->version < ARM_SMMU_V2) - reg |= FIELD_PREP(CBAR_IRPTNDX, cfg->irptndx); + reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx); /* * Use the weakest shareability/memory types, so they are * overridden by the ttbcr/pte. */ if (stage1) { - reg |= FIELD_PREP(CBAR_S1_BPSHCFG, CBAR_S1_BPSHCFG_NSH) | - FIELD_PREP(CBAR_S1_MEMATTR, CBAR_S1_MEMATTR_WB); + reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG, + ARM_SMMU_CBAR_S1_BPSHCFG_NSH) | + FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR, + ARM_SMMU_CBAR_S1_MEMATTR_WB); } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) { /* 8-bit VMIDs live in CBAR */ - reg |= FIELD_PREP(CBAR_VMID, cfg->vmid); + reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid); } arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg); @@ -651,11 +655,12 @@ static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx) } /* SCTLR */ - reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M; + reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE | + ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M; if (stage1) - reg |= SCTLR_S1_ASIDPNE; + reg |= ARM_SMMU_SCTLR_S1_ASIDPNE; if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) - reg |= SCTLR_E; + reg |= ARM_SMMU_SCTLR_E; arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg); } @@ -837,7 +842,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, if (ret < 0) { dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n", cfg->irptndx, irq); - cfg->irptndx = INVALID_IRPTNDX; + cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX; } mutex_unlock(&smmu_domain->init_mutex); @@ -875,7 +880,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) smmu->cbs[cfg->cbndx].cfg = NULL; arm_smmu_write_context_bank(smmu, cfg->cbndx); - if (cfg->irptndx != INVALID_IRPTNDX) { + if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) { irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; devm_free_irq(smmu->dev, irq, domain); } @@ -931,23 +936,24 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx) { struct arm_smmu_smr *smr = smmu->smrs + idx; - u32 reg = FIELD_PREP(SMR_ID, smr->id) | FIELD_PREP(SMR_MASK, smr->mask); + u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) | + FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask); if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid) - reg |= SMR_VALID; + reg |= ARM_SMMU_SMR_VALID; arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg); } static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx) { struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx; - u32 reg = FIELD_PREP(S2CR_TYPE, s2cr->type) | - FIELD_PREP(S2CR_CBNDX, s2cr->cbndx) | - FIELD_PREP(S2CR_PRIVCFG, s2cr->privcfg); + u32 reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) | + FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) | + FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg); if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs && smmu->smrs[idx].valid) - reg |= S2CR_EXIDVALID; + reg |= ARM_SMMU_S2CR_EXIDVALID; arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg); } @@ -965,24 +971,37 @@ static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx) static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu) { u32 smr; + int i; if (!smmu->smrs) return; - + /* + * If we've had to accommodate firmware memory regions, we may + * have live SMRs by now; tread carefully... + * + * Somewhat perversely, not having a free SMR for this test implies we + * can get away without it anyway, as we'll only be able to 'allocate' + * these SMRs for the ID/mask values we're already trusting to be OK. + */ + for (i = 0; i < smmu->num_mapping_groups; i++) + if (!smmu->smrs[i].valid) + goto smr_ok; + return; +smr_ok: /* * SMR.ID bits may not be preserved if the corresponding MASK * bits are set, so check each one separately. We can reject * masters later if they try to claim IDs outside these masks. */ - smr = FIELD_PREP(SMR_ID, smmu->streamid_mask); - arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr); - smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0)); - smmu->streamid_mask = FIELD_GET(SMR_ID, smr); + smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask); + arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr); + smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i)); + smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr); - smr = FIELD_PREP(SMR_MASK, smmu->streamid_mask); - arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr); - smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0)); - smmu->smr_mask_mask = FIELD_GET(SMR_MASK, smr); + smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask); + arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr); + smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i)); + smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr); } static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask) @@ -1051,8 +1070,8 @@ static int arm_smmu_master_alloc_smes(struct device *dev) mutex_lock(&smmu->stream_map_mutex); /* Figure out a viable stream map entry allocation */ for_each_cfg_sme(fwspec, i, idx) { - u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]); - u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]); + u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]); + u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]); if (idx != INVALID_SMENDX) { ret = -EEXIST; @@ -1296,7 +1315,8 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va); reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR; - if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) { + if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE), + 5, 50)) { spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); dev_err(dev, "iova to phys timed out on %pad. Falling back to software table walk.\n", @@ -1306,7 +1326,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR); spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); - if (phys & CB_PAR_F) { + if (phys & ARM_SMMU_CB_PAR_F) { dev_err(dev, "translation fault!\n"); dev_err(dev, "PAR = 0x%llx\n", phys); return 0; @@ -1387,8 +1407,8 @@ static int arm_smmu_add_device(struct device *dev) ret = -EINVAL; for (i = 0; i < fwspec->num_ids; i++) { - u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]); - u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]); + u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]); + u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]); if (sid & ~smmu->streamid_mask) { dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n", @@ -1569,12 +1589,12 @@ static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) u32 mask, fwid = 0; if (args->args_count > 0) - fwid |= FIELD_PREP(SMR_ID, args->args[0]); + fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]); if (args->args_count > 1) - fwid |= FIELD_PREP(SMR_MASK, args->args[1]); + fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]); else if (!of_property_read_u32(args->np, "stream-match-mask", &mask)) - fwid |= FIELD_PREP(SMR_MASK, mask); + fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask); return iommu_fwspec_add_ids(dev, &fwid, 1); } @@ -1623,7 +1643,6 @@ static struct iommu_ops arm_smmu_ops = { .get_resv_regions = arm_smmu_get_resv_regions, .put_resv_regions = arm_smmu_put_resv_regions, .pgsize_bitmap = -1UL, /* Restricted during device attach */ - .owner = THIS_MODULE, }; static void arm_smmu_device_reset(struct arm_smmu_device *smmu) @@ -1645,7 +1664,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) /* Make sure all context banks are disabled and clear CB_FSR */ for (i = 0; i < smmu->num_context_banks; ++i) { arm_smmu_write_context_bank(smmu, i); - arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, FSR_FAULT); + arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT); } /* Invalidate the TLB, just in case */ @@ -1655,29 +1674,30 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0); /* Enable fault reporting */ - reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE); + reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE | + ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE); /* Disable TLB broadcasting. */ - reg |= (sCR0_VMIDPNE | sCR0_PTM); + reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM); /* Enable client access, handling unmatched streams as appropriate */ - reg &= ~sCR0_CLIENTPD; + reg &= ~ARM_SMMU_sCR0_CLIENTPD; if (disable_bypass) - reg |= sCR0_USFCFG; + reg |= ARM_SMMU_sCR0_USFCFG; else - reg &= ~sCR0_USFCFG; + reg &= ~ARM_SMMU_sCR0_USFCFG; /* Disable forced broadcasting */ - reg &= ~sCR0_FB; + reg &= ~ARM_SMMU_sCR0_FB; /* Don't upgrade barriers */ - reg &= ~(sCR0_BSU); + reg &= ~(ARM_SMMU_sCR0_BSU); if (smmu->features & ARM_SMMU_FEAT_VMID16) - reg |= sCR0_VMID16EN; + reg |= ARM_SMMU_sCR0_VMID16EN; if (smmu->features & ARM_SMMU_FEAT_EXIDS) - reg |= sCR0_EXIDENABLE; + reg |= ARM_SMMU_sCR0_EXIDENABLE; if (smmu->impl && smmu->impl->reset) smmu->impl->reset(smmu); @@ -1722,21 +1742,21 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) /* Restrict available stages based on module parameter */ if (force_stage == 1) - id &= ~(ID0_S2TS | ID0_NTS); + id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS); else if (force_stage == 2) - id &= ~(ID0_S1TS | ID0_NTS); + id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS); - if (id & ID0_S1TS) { + if (id & ARM_SMMU_ID0_S1TS) { smmu->features |= ARM_SMMU_FEAT_TRANS_S1; dev_notice(smmu->dev, "\tstage 1 translation\n"); } - if (id & ID0_S2TS) { + if (id & ARM_SMMU_ID0_S2TS) { smmu->features |= ARM_SMMU_FEAT_TRANS_S2; dev_notice(smmu->dev, "\tstage 2 translation\n"); } - if (id & ID0_NTS) { + if (id & ARM_SMMU_ID0_NTS) { smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED; dev_notice(smmu->dev, "\tnested translation\n"); } @@ -1747,8 +1767,8 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) return -ENODEV; } - if ((id & ID0_S1TS) && - ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) { + if ((id & ARM_SMMU_ID0_S1TS) && + ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) { smmu->features |= ARM_SMMU_FEAT_TRANS_OPS; dev_notice(smmu->dev, "\taddress translation ops\n"); } @@ -1759,7 +1779,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) * Fortunately, this also opens up a workaround for systems where the * ID register value has ended up configured incorrectly. */ - cttw_reg = !!(id & ID0_CTTW); + cttw_reg = !!(id & ARM_SMMU_ID0_CTTW); if (cttw_fw || cttw_reg) dev_notice(smmu->dev, "\t%scoherent table walk\n", cttw_fw ? "" : "non-"); @@ -1768,16 +1788,16 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) "\t(IDR0.CTTW overridden by FW configuration)\n"); /* Max. number of entries we have for stream matching/indexing */ - if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) { + if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) { smmu->features |= ARM_SMMU_FEAT_EXIDS; size = 1 << 16; } else { - size = 1 << FIELD_GET(ID0_NUMSIDB, id); + size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id); } smmu->streamid_mask = size - 1; - if (id & ID0_SMS) { + if (id & ARM_SMMU_ID0_SMS) { smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH; - size = FIELD_GET(ID0_NUMSMRG, id); + size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id); if (size == 0) { dev_err(smmu->dev, "stream-matching supported, but no SMRs present!\n"); @@ -1805,18 +1825,19 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) mutex_init(&smmu->stream_map_mutex); spin_lock_init(&smmu->global_sync_lock); - if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) { + if (smmu->version < ARM_SMMU_V2 || + !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) { smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L; - if (!(id & ID0_PTFS_NO_AARCH32S)) + if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S)) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S; } /* ID1 */ id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1); - smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12; + smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12; /* Check for size mismatch of SMMU address space from mapped region */ - size = 1 << (FIELD_GET(ID1_NUMPAGENDXB, id) + 1); + size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1); if (smmu->numpage != 2 * size << smmu->pgshift) dev_warn(smmu->dev, "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n", @@ -1824,8 +1845,8 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */ smmu->numpage = size; - smmu->num_s2_context_banks = FIELD_GET(ID1_NUMS2CB, id); - smmu->num_context_banks = FIELD_GET(ID1_NUMCB, id); + smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id); + smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id); if (smmu->num_s2_context_banks > smmu->num_context_banks) { dev_err(smmu->dev, "impossible number of S2 context banks!\n"); return -ENODEV; @@ -1839,14 +1860,14 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) /* ID2 */ id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2); - size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_IAS, id)); + size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id)); smmu->ipa_size = size; /* The output mask is also applied for bypass */ - size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_OAS, id)); + size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id)); smmu->pa_size = size; - if (id & ID2_VMID16) + if (id & ARM_SMMU_ID2_VMID16) smmu->features |= ARM_SMMU_FEAT_VMID16; /* @@ -1863,13 +1884,13 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) if (smmu->version == ARM_SMMU_V1_64K) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K; } else { - size = FIELD_GET(ID2_UBS, id); + size = FIELD_GET(ARM_SMMU_ID2_UBS, id); smmu->va_size = arm_smmu_id_size_to_bits(size); - if (id & ID2_PTFS_4K) + if (id & ARM_SMMU_ID2_PTFS_4K) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K; - if (id & ID2_PTFS_16K) + if (id & ARM_SMMU_ID2_PTFS_16K) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K; - if (id & ID2_PTFS_64K) + if (id & ARM_SMMU_ID2_PTFS_64K) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K; } @@ -2246,7 +2267,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev) arm_smmu_rpm_get(smmu); /* Turn the thing off */ - arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, sCR0_CLIENTPD); + arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD); arm_smmu_rpm_put(smmu); if (pm_runtime_enabled(smmu->dev)) @@ -2311,7 +2332,7 @@ static const struct dev_pm_ops arm_smmu_pm_ops = { static struct platform_driver arm_smmu_driver = { .driver = { .name = "arm-smmu", - .of_match_table = of_match_ptr(arm_smmu_of_match), + .of_match_table = arm_smmu_of_match, .pm = &arm_smmu_pm_ops, .suppress_bind_attrs = true, }, diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h index 62b9f0cec49b..8d1cd54d82a6 100644 --- a/drivers/iommu/arm-smmu.h +++ b/drivers/iommu/arm-smmu.h @@ -11,6 +11,7 @@ #define _ARM_SMMU_H #include <linux/atomic.h> +#include <linux/bitfield.h> #include <linux/bits.h> #include <linux/clk.h> #include <linux/device.h> @@ -23,51 +24,51 @@ /* Configuration registers */ #define ARM_SMMU_GR0_sCR0 0x0 -#define sCR0_VMID16EN BIT(31) -#define sCR0_BSU GENMASK(15, 14) -#define sCR0_FB BIT(13) -#define sCR0_PTM BIT(12) -#define sCR0_VMIDPNE BIT(11) -#define sCR0_USFCFG BIT(10) -#define sCR0_GCFGFIE BIT(5) -#define sCR0_GCFGFRE BIT(4) -#define sCR0_EXIDENABLE BIT(3) -#define sCR0_GFIE BIT(2) -#define sCR0_GFRE BIT(1) -#define sCR0_CLIENTPD BIT(0) +#define ARM_SMMU_sCR0_VMID16EN BIT(31) +#define ARM_SMMU_sCR0_BSU GENMASK(15, 14) +#define ARM_SMMU_sCR0_FB BIT(13) +#define ARM_SMMU_sCR0_PTM BIT(12) +#define ARM_SMMU_sCR0_VMIDPNE BIT(11) +#define ARM_SMMU_sCR0_USFCFG BIT(10) +#define ARM_SMMU_sCR0_GCFGFIE BIT(5) +#define ARM_SMMU_sCR0_GCFGFRE BIT(4) +#define ARM_SMMU_sCR0_EXIDENABLE BIT(3) +#define ARM_SMMU_sCR0_GFIE BIT(2) +#define ARM_SMMU_sCR0_GFRE BIT(1) +#define ARM_SMMU_sCR0_CLIENTPD BIT(0) /* Auxiliary Configuration register */ #define ARM_SMMU_GR0_sACR 0x10 /* Identification registers */ #define ARM_SMMU_GR0_ID0 0x20 -#define ID0_S1TS BIT(30) -#define ID0_S2TS BIT(29) -#define ID0_NTS BIT(28) -#define ID0_SMS BIT(27) -#define ID0_ATOSNS BIT(26) -#define ID0_PTFS_NO_AARCH32 BIT(25) -#define ID0_PTFS_NO_AARCH32S BIT(24) -#define ID0_NUMIRPT GENMASK(23, 16) -#define ID0_CTTW BIT(14) -#define ID0_NUMSIDB GENMASK(12, 9) -#define ID0_EXIDS BIT(8) -#define ID0_NUMSMRG GENMASK(7, 0) +#define ARM_SMMU_ID0_S1TS BIT(30) +#define ARM_SMMU_ID0_S2TS BIT(29) +#define ARM_SMMU_ID0_NTS BIT(28) +#define ARM_SMMU_ID0_SMS BIT(27) +#define ARM_SMMU_ID0_ATOSNS BIT(26) +#define ARM_SMMU_ID0_PTFS_NO_AARCH32 BIT(25) +#define ARM_SMMU_ID0_PTFS_NO_AARCH32S BIT(24) +#define ARM_SMMU_ID0_NUMIRPT GENMASK(23, 16) +#define ARM_SMMU_ID0_CTTW BIT(14) +#define ARM_SMMU_ID0_NUMSIDB GENMASK(12, 9) +#define ARM_SMMU_ID0_EXIDS BIT(8) +#define ARM_SMMU_ID0_NUMSMRG GENMASK(7, 0) #define ARM_SMMU_GR0_ID1 0x24 -#define ID1_PAGESIZE BIT(31) -#define ID1_NUMPAGENDXB GENMASK(30, 28) -#define ID1_NUMS2CB GENMASK(23, 16) -#define ID1_NUMCB GENMASK(7, 0) +#define ARM_SMMU_ID1_PAGESIZE BIT(31) +#define ARM_SMMU_ID1_NUMPAGENDXB GENMASK(30, 28) +#define ARM_SMMU_ID1_NUMS2CB GENMASK(23, 16) +#define ARM_SMMU_ID1_NUMCB GENMASK(7, 0) #define ARM_SMMU_GR0_ID2 0x28 -#define ID2_VMID16 BIT(15) -#define ID2_PTFS_64K BIT(14) -#define ID2_PTFS_16K BIT(13) -#define ID2_PTFS_4K BIT(12) -#define ID2_UBS GENMASK(11, 8) -#define ID2_OAS GENMASK(7, 4) -#define ID2_IAS GENMASK(3, 0) +#define ARM_SMMU_ID2_VMID16 BIT(15) +#define ARM_SMMU_ID2_PTFS_64K BIT(14) +#define ARM_SMMU_ID2_PTFS_16K BIT(13) +#define ARM_SMMU_ID2_PTFS_4K BIT(12) +#define ARM_SMMU_ID2_UBS GENMASK(11, 8) +#define ARM_SMMU_ID2_OAS GENMASK(7, 4) +#define ARM_SMMU_ID2_IAS GENMASK(3, 0) #define ARM_SMMU_GR0_ID3 0x2c #define ARM_SMMU_GR0_ID4 0x30 @@ -75,11 +76,11 @@ #define ARM_SMMU_GR0_ID6 0x38 #define ARM_SMMU_GR0_ID7 0x3c -#define ID7_MAJOR GENMASK(7, 4) -#define ID7_MINOR GENMASK(3, 0) +#define ARM_SMMU_ID7_MAJOR GENMASK(7, 4) +#define ARM_SMMU_ID7_MINOR GENMASK(3, 0) #define ARM_SMMU_GR0_sGFSR 0x48 -#define sGFSR_USF BIT(1) +#define ARM_SMMU_sGFSR_USF BIT(1) #define ARM_SMMU_GR0_sGFSYNR0 0x50 #define ARM_SMMU_GR0_sGFSYNR1 0x54 @@ -92,106 +93,132 @@ #define ARM_SMMU_GR0_sTLBGSYNC 0x70 #define ARM_SMMU_GR0_sTLBGSTATUS 0x74 -#define sTLBGSTATUS_GSACTIVE BIT(0) +#define ARM_SMMU_sTLBGSTATUS_GSACTIVE BIT(0) /* Stream mapping registers */ #define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2)) -#define SMR_VALID BIT(31) -#define SMR_MASK GENMASK(31, 16) -#define SMR_ID GENMASK(15, 0) +#define ARM_SMMU_SMR_VALID BIT(31) +#define ARM_SMMU_SMR_MASK GENMASK(31, 16) +#define ARM_SMMU_SMR_ID GENMASK(15, 0) #define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2)) -#define S2CR_PRIVCFG GENMASK(25, 24) +#define ARM_SMMU_S2CR_PRIVCFG GENMASK(25, 24) enum arm_smmu_s2cr_privcfg { S2CR_PRIVCFG_DEFAULT, S2CR_PRIVCFG_DIPAN, S2CR_PRIVCFG_UNPRIV, S2CR_PRIVCFG_PRIV, }; -#define S2CR_TYPE GENMASK(17, 16) +#define ARM_SMMU_S2CR_TYPE GENMASK(17, 16) enum arm_smmu_s2cr_type { S2CR_TYPE_TRANS, S2CR_TYPE_BYPASS, S2CR_TYPE_FAULT, }; -#define S2CR_EXIDVALID BIT(10) -#define S2CR_CBNDX GENMASK(7, 0) +#define ARM_SMMU_S2CR_EXIDVALID BIT(10) +#define ARM_SMMU_S2CR_CBNDX GENMASK(7, 0) /* Context bank attribute registers */ #define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2)) -#define CBAR_IRPTNDX GENMASK(31, 24) -#define CBAR_TYPE GENMASK(17, 16) +#define ARM_SMMU_CBAR_IRPTNDX GENMASK(31, 24) +#define ARM_SMMU_CBAR_TYPE GENMASK(17, 16) enum arm_smmu_cbar_type { CBAR_TYPE_S2_TRANS, CBAR_TYPE_S1_TRANS_S2_BYPASS, CBAR_TYPE_S1_TRANS_S2_FAULT, CBAR_TYPE_S1_TRANS_S2_TRANS, }; -#define CBAR_S1_MEMATTR GENMASK(15, 12) -#define CBAR_S1_MEMATTR_WB 0xf -#define CBAR_S1_BPSHCFG GENMASK(9, 8) -#define CBAR_S1_BPSHCFG_NSH 3 -#define CBAR_VMID GENMASK(7, 0) +#define ARM_SMMU_CBAR_S1_MEMATTR GENMASK(15, 12) +#define ARM_SMMU_CBAR_S1_MEMATTR_WB 0xf +#define ARM_SMMU_CBAR_S1_BPSHCFG GENMASK(9, 8) +#define ARM_SMMU_CBAR_S1_BPSHCFG_NSH 3 +#define ARM_SMMU_CBAR_VMID GENMASK(7, 0) #define ARM_SMMU_GR1_CBFRSYNRA(n) (0x400 + ((n) << 2)) #define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2)) -#define CBA2R_VMID16 GENMASK(31, 16) -#define CBA2R_VA64 BIT(0) +#define ARM_SMMU_CBA2R_VMID16 GENMASK(31, 16) +#define ARM_SMMU_CBA2R_VA64 BIT(0) #define ARM_SMMU_CB_SCTLR 0x0 -#define SCTLR_S1_ASIDPNE BIT(12) -#define SCTLR_CFCFG BIT(7) -#define SCTLR_CFIE BIT(6) -#define SCTLR_CFRE BIT(5) -#define SCTLR_E BIT(4) -#define SCTLR_AFE BIT(2) -#define SCTLR_TRE BIT(1) -#define SCTLR_M BIT(0) +#define ARM_SMMU_SCTLR_S1_ASIDPNE BIT(12) +#define ARM_SMMU_SCTLR_CFCFG BIT(7) +#define ARM_SMMU_SCTLR_CFIE BIT(6) +#define ARM_SMMU_SCTLR_CFRE BIT(5) +#define ARM_SMMU_SCTLR_E BIT(4) +#define ARM_SMMU_SCTLR_AFE BIT(2) +#define ARM_SMMU_SCTLR_TRE BIT(1) +#define ARM_SMMU_SCTLR_M BIT(0) #define ARM_SMMU_CB_ACTLR 0x4 #define ARM_SMMU_CB_RESUME 0x8 -#define RESUME_TERMINATE BIT(0) +#define ARM_SMMU_RESUME_TERMINATE BIT(0) #define ARM_SMMU_CB_TCR2 0x10 -#define TCR2_SEP GENMASK(17, 15) -#define TCR2_SEP_UPSTREAM 0x7 -#define TCR2_AS BIT(4) +#define ARM_SMMU_TCR2_SEP GENMASK(17, 15) +#define ARM_SMMU_TCR2_SEP_UPSTREAM 0x7 +#define ARM_SMMU_TCR2_AS BIT(4) +#define ARM_SMMU_TCR2_PASIZE GENMASK(3, 0) #define ARM_SMMU_CB_TTBR0 0x20 #define ARM_SMMU_CB_TTBR1 0x28 -#define TTBRn_ASID GENMASK_ULL(63, 48) +#define ARM_SMMU_TTBRn_ASID GENMASK_ULL(63, 48) #define ARM_SMMU_CB_TCR 0x30 +#define ARM_SMMU_TCR_EAE BIT(31) +#define ARM_SMMU_TCR_EPD1 BIT(23) +#define ARM_SMMU_TCR_TG0 GENMASK(15, 14) +#define ARM_SMMU_TCR_SH0 GENMASK(13, 12) +#define ARM_SMMU_TCR_ORGN0 GENMASK(11, 10) +#define ARM_SMMU_TCR_IRGN0 GENMASK(9, 8) +#define ARM_SMMU_TCR_T0SZ GENMASK(5, 0) + +#define ARM_SMMU_VTCR_RES1 BIT(31) +#define ARM_SMMU_VTCR_PS GENMASK(18, 16) +#define ARM_SMMU_VTCR_TG0 ARM_SMMU_TCR_TG0 +#define ARM_SMMU_VTCR_SH0 ARM_SMMU_TCR_SH0 +#define ARM_SMMU_VTCR_ORGN0 ARM_SMMU_TCR_ORGN0 +#define ARM_SMMU_VTCR_IRGN0 ARM_SMMU_TCR_IRGN0 +#define ARM_SMMU_VTCR_SL0 GENMASK(7, 6) +#define ARM_SMMU_VTCR_T0SZ ARM_SMMU_TCR_T0SZ + #define ARM_SMMU_CB_CONTEXTIDR 0x34 #define ARM_SMMU_CB_S1_MAIR0 0x38 #define ARM_SMMU_CB_S1_MAIR1 0x3c #define ARM_SMMU_CB_PAR 0x50 -#define CB_PAR_F BIT(0) +#define ARM_SMMU_CB_PAR_F BIT(0) #define ARM_SMMU_CB_FSR 0x58 -#define FSR_MULTI BIT(31) -#define FSR_SS BIT(30) -#define FSR_UUT BIT(8) -#define FSR_ASF BIT(7) -#define FSR_TLBLKF BIT(6) -#define FSR_TLBMCF BIT(5) -#define FSR_EF BIT(4) -#define FSR_PF BIT(3) -#define FSR_AFF BIT(2) -#define FSR_TF BIT(1) - -#define FSR_IGN (FSR_AFF | FSR_ASF | \ - FSR_TLBMCF | FSR_TLBLKF) -#define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \ - FSR_EF | FSR_PF | FSR_TF | FSR_IGN) +#define ARM_SMMU_FSR_MULTI BIT(31) +#define ARM_SMMU_FSR_SS BIT(30) +#define ARM_SMMU_FSR_UUT BIT(8) +#define ARM_SMMU_FSR_ASF BIT(7) +#define ARM_SMMU_FSR_TLBLKF BIT(6) +#define ARM_SMMU_FSR_TLBMCF BIT(5) +#define ARM_SMMU_FSR_EF BIT(4) +#define ARM_SMMU_FSR_PF BIT(3) +#define ARM_SMMU_FSR_AFF BIT(2) +#define ARM_SMMU_FSR_TF BIT(1) + +#define ARM_SMMU_FSR_IGN (ARM_SMMU_FSR_AFF | \ + ARM_SMMU_FSR_ASF | \ + ARM_SMMU_FSR_TLBMCF | \ + ARM_SMMU_FSR_TLBLKF) + +#define ARM_SMMU_FSR_FAULT (ARM_SMMU_FSR_MULTI | \ + ARM_SMMU_FSR_SS | \ + ARM_SMMU_FSR_UUT | \ + ARM_SMMU_FSR_EF | \ + ARM_SMMU_FSR_PF | \ + ARM_SMMU_FSR_TF | \ + ARM_SMMU_FSR_IGN) #define ARM_SMMU_CB_FAR 0x60 #define ARM_SMMU_CB_FSYNR0 0x68 -#define FSYNR0_WNR BIT(4) +#define ARM_SMMU_FSYNR0_WNR BIT(4) #define ARM_SMMU_CB_S1_TLBIVA 0x600 #define ARM_SMMU_CB_S1_TLBIASID 0x610 @@ -203,7 +230,7 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_CB_ATS1PR 0x800 #define ARM_SMMU_CB_ATSR 0x8f0 -#define ATSR_ACTIVE BIT(0) +#define ARM_SMMU_ATSR_ACTIVE BIT(0) /* Maximum number of context banks per SMMU */ @@ -297,7 +324,7 @@ struct arm_smmu_cfg { enum arm_smmu_cbar_type cbar; enum arm_smmu_context_fmt fmt; }; -#define INVALID_IRPTNDX 0xff +#define ARM_SMMU_INVALID_IRPTNDX 0xff enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, @@ -318,6 +345,33 @@ struct arm_smmu_domain { struct iommu_domain domain; }; +static inline u32 arm_smmu_lpae_tcr(struct io_pgtable_cfg *cfg) +{ + return ARM_SMMU_TCR_EPD1 | + FIELD_PREP(ARM_SMMU_TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) | + FIELD_PREP(ARM_SMMU_TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | + FIELD_PREP(ARM_SMMU_TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | + FIELD_PREP(ARM_SMMU_TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | + FIELD_PREP(ARM_SMMU_TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); +} + +static inline u32 arm_smmu_lpae_tcr2(struct io_pgtable_cfg *cfg) +{ + return FIELD_PREP(ARM_SMMU_TCR2_PASIZE, cfg->arm_lpae_s1_cfg.tcr.ips) | + FIELD_PREP(ARM_SMMU_TCR2_SEP, ARM_SMMU_TCR2_SEP_UPSTREAM); +} + +static inline u32 arm_smmu_lpae_vtcr(struct io_pgtable_cfg *cfg) +{ + return ARM_SMMU_VTCR_RES1 | + FIELD_PREP(ARM_SMMU_VTCR_PS, cfg->arm_lpae_s2_cfg.vtcr.ps) | + FIELD_PREP(ARM_SMMU_VTCR_TG0, cfg->arm_lpae_s2_cfg.vtcr.tg) | + FIELD_PREP(ARM_SMMU_VTCR_SH0, cfg->arm_lpae_s2_cfg.vtcr.sh) | + FIELD_PREP(ARM_SMMU_VTCR_ORGN0, cfg->arm_lpae_s2_cfg.vtcr.orgn) | + FIELD_PREP(ARM_SMMU_VTCR_IRGN0, cfg->arm_lpae_s2_cfg.vtcr.irgn) | + FIELD_PREP(ARM_SMMU_VTCR_SL0, cfg->arm_lpae_s2_cfg.vtcr.sl) | + FIELD_PREP(ARM_SMMU_VTCR_T0SZ, cfg->arm_lpae_s2_cfg.vtcr.tsz); +} /* Implementation details, yay! */ struct arm_smmu_impl { diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 7c3bd2c3cdca..4272fe4e17f4 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -149,8 +149,6 @@ #define ARM_V7S_TTBR_IRGN_ATTR(attr) \ ((((attr) & 0x1) << 6) | (((attr) & 0x2) >> 1)) -#define ARM_V7S_TCR_PD1 BIT(5) - #ifdef CONFIG_ZONE_DMA32 #define ARM_V7S_TABLE_GFP_DMA GFP_DMA32 #define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32 @@ -798,8 +796,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, */ cfg->pgsize_bitmap &= SZ_4K | SZ_64K | SZ_1M | SZ_16M; - /* TCR: T0SZ=0, disable TTBR1 */ - cfg->arm_v7s_cfg.tcr = ARM_V7S_TCR_PD1; + /* TCR: T0SZ=0, EAE=0 (if applicable) */ + cfg->arm_v7s_cfg.tcr = 0; /* * TEX remap: the indices used map to the closest equivalent types @@ -822,15 +820,13 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, /* Ensure the empty pgd is visible before any actual TTBR write */ wmb(); - /* TTBRs */ - cfg->arm_v7s_cfg.ttbr[0] = virt_to_phys(data->pgd) | - ARM_V7S_TTBR_S | ARM_V7S_TTBR_NOS | - (cfg->coherent_walk ? - (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) | - ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) : - (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) | - ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC))); - cfg->arm_v7s_cfg.ttbr[1] = 0; + /* TTBR */ + cfg->arm_v7s_cfg.ttbr = virt_to_phys(data->pgd) | ARM_V7S_TTBR_S | + (cfg->coherent_walk ? (ARM_V7S_TTBR_NOS | + ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) | + ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) : + (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) | + ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC))); return &data->iop; out_free_data: diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index bdf47f745268..983b08477e64 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -100,40 +100,29 @@ #define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2) /* Register bits */ -#define ARM_32_LPAE_TCR_EAE (1 << 31) -#define ARM_64_LPAE_S2_TCR_RES1 (1 << 31) +#define ARM_LPAE_TCR_TG0_4K 0 +#define ARM_LPAE_TCR_TG0_64K 1 +#define ARM_LPAE_TCR_TG0_16K 2 -#define ARM_LPAE_TCR_EPD1 (1 << 23) +#define ARM_LPAE_TCR_TG1_16K 1 +#define ARM_LPAE_TCR_TG1_4K 2 +#define ARM_LPAE_TCR_TG1_64K 3 -#define ARM_LPAE_TCR_TG0_4K (0 << 14) -#define ARM_LPAE_TCR_TG0_64K (1 << 14) -#define ARM_LPAE_TCR_TG0_16K (2 << 14) - -#define ARM_LPAE_TCR_SH0_SHIFT 12 -#define ARM_LPAE_TCR_SH0_MASK 0x3 #define ARM_LPAE_TCR_SH_NS 0 #define ARM_LPAE_TCR_SH_OS 2 #define ARM_LPAE_TCR_SH_IS 3 -#define ARM_LPAE_TCR_ORGN0_SHIFT 10 -#define ARM_LPAE_TCR_IRGN0_SHIFT 8 -#define ARM_LPAE_TCR_RGN_MASK 0x3 #define ARM_LPAE_TCR_RGN_NC 0 #define ARM_LPAE_TCR_RGN_WBWA 1 #define ARM_LPAE_TCR_RGN_WT 2 #define ARM_LPAE_TCR_RGN_WB 3 -#define ARM_LPAE_TCR_SL0_SHIFT 6 -#define ARM_LPAE_TCR_SL0_MASK 0x3 +#define ARM_LPAE_VTCR_SL0_MASK 0x3 #define ARM_LPAE_TCR_T0SZ_SHIFT 0 -#define ARM_LPAE_TCR_SZ_MASK 0xf - -#define ARM_LPAE_TCR_PS_SHIFT 16 -#define ARM_LPAE_TCR_PS_MASK 0x7 -#define ARM_LPAE_TCR_IPS_SHIFT 32 -#define ARM_LPAE_TCR_IPS_MASK 0x7 +#define ARM_LPAE_VTCR_PS_SHIFT 16 +#define ARM_LPAE_VTCR_PS_MASK 0x7 #define ARM_LPAE_TCR_PS_32_BIT 0x0ULL #define ARM_LPAE_TCR_PS_36_BIT 0x1ULL @@ -293,17 +282,11 @@ static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, { arm_lpae_iopte pte = prot; - if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) - pte |= ARM_LPAE_PTE_NS; - if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1) pte |= ARM_LPAE_PTE_TYPE_PAGE; else pte |= ARM_LPAE_PTE_TYPE_BLOCK; - if (data->iop.fmt != ARM_MALI_LPAE) - pte |= ARM_LPAE_PTE_AF; - pte |= ARM_LPAE_PTE_SH_IS; pte |= paddr_to_iopte(paddr, data); __arm_lpae_set_pte(ptep, pte, &data->iop.cfg); @@ -460,9 +443,20 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, << ARM_LPAE_PTE_ATTRINDX_SHIFT); } + if (prot & IOMMU_CACHE) + pte |= ARM_LPAE_PTE_SH_IS; + else + pte |= ARM_LPAE_PTE_SH_OS; + if (prot & IOMMU_NOEXEC) pte |= ARM_LPAE_PTE_XN; + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) + pte |= ARM_LPAE_PTE_NS; + + if (data->iop.fmt != ARM_MALI_LPAE) + pte |= ARM_LPAE_PTE_AF; + return pte; } @@ -474,6 +468,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, arm_lpae_iopte *ptep = data->pgd; int ret, lvl = data->start_level; arm_lpae_iopte prot; + long iaext = (long)iova >> cfg->ias; /* If no access, then nothing to do */ if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) @@ -482,7 +477,9 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size)) return -EINVAL; - if (WARN_ON(iova >> data->iop.cfg.ias || paddr >> data->iop.cfg.oas)) + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) + iaext = ~iaext; + if (WARN_ON(iaext || paddr >> cfg->oas)) return -ERANGE; prot = arm_lpae_prot_to_pte(data, iommu_prot); @@ -648,11 +645,14 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); struct io_pgtable_cfg *cfg = &data->iop.cfg; arm_lpae_iopte *ptep = data->pgd; + long iaext = (long)iova >> cfg->ias; if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size)) return 0; - if (WARN_ON(iova >> data->iop.cfg.ias)) + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) + iaext = ~iaext; + if (WARN_ON(iaext)) return 0; return __arm_lpae_unmap(data, gather, iova, size, data->start_level, ptep); @@ -787,9 +787,12 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) { u64 reg; struct arm_lpae_io_pgtable *data; + typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr; + bool tg1; if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | - IO_PGTABLE_QUIRK_NON_STRICT)) + IO_PGTABLE_QUIRK_NON_STRICT | + IO_PGTABLE_QUIRK_ARM_TTBR1)) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -798,58 +801,55 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) /* TCR */ if (cfg->coherent_walk) { - reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + tcr->sh = ARM_LPAE_TCR_SH_IS; + tcr->irgn = ARM_LPAE_TCR_RGN_WBWA; + tcr->orgn = ARM_LPAE_TCR_RGN_WBWA; } else { - reg = (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH0_SHIFT) | - (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) | - (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN0_SHIFT); + tcr->sh = ARM_LPAE_TCR_SH_OS; + tcr->irgn = ARM_LPAE_TCR_RGN_NC; + tcr->orgn = ARM_LPAE_TCR_RGN_NC; } + tg1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1; switch (ARM_LPAE_GRANULE(data)) { case SZ_4K: - reg |= ARM_LPAE_TCR_TG0_4K; + tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_4K : ARM_LPAE_TCR_TG0_4K; break; case SZ_16K: - reg |= ARM_LPAE_TCR_TG0_16K; + tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_16K : ARM_LPAE_TCR_TG0_16K; break; case SZ_64K: - reg |= ARM_LPAE_TCR_TG0_64K; + tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_64K : ARM_LPAE_TCR_TG0_64K; break; } switch (cfg->oas) { case 32: - reg |= (ARM_LPAE_TCR_PS_32_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_32_BIT; break; case 36: - reg |= (ARM_LPAE_TCR_PS_36_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_36_BIT; break; case 40: - reg |= (ARM_LPAE_TCR_PS_40_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_40_BIT; break; case 42: - reg |= (ARM_LPAE_TCR_PS_42_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_42_BIT; break; case 44: - reg |= (ARM_LPAE_TCR_PS_44_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_44_BIT; break; case 48: - reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_48_BIT; break; case 52: - reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_52_BIT; break; default: goto out_free_data; } - reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT; - - /* Disable speculative walks through TTBR1 */ - reg |= ARM_LPAE_TCR_EPD1; - cfg->arm_lpae_s1_cfg.tcr = reg; + tcr->tsz = 64ULL - cfg->ias; /* MAIRs */ reg = (ARM_LPAE_MAIR_ATTR_NC @@ -872,9 +872,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) /* Ensure the empty pgd is visible before any actual TTBR write */ wmb(); - /* TTBRs */ - cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd); - cfg->arm_lpae_s1_cfg.ttbr[1] = 0; + /* TTBR */ + cfg->arm_lpae_s1_cfg.ttbr = virt_to_phys(data->pgd); return &data->iop; out_free_data: @@ -885,8 +884,9 @@ out_free_data: static struct io_pgtable * arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) { - u64 reg, sl; + u64 sl; struct arm_lpae_io_pgtable *data; + typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr; /* The NS quirk doesn't apply at stage 2 */ if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NON_STRICT)) @@ -911,55 +911,59 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) } /* VTCR */ - reg = ARM_64_LPAE_S2_TCR_RES1 | - (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + if (cfg->coherent_walk) { + vtcr->sh = ARM_LPAE_TCR_SH_IS; + vtcr->irgn = ARM_LPAE_TCR_RGN_WBWA; + vtcr->orgn = ARM_LPAE_TCR_RGN_WBWA; + } else { + vtcr->sh = ARM_LPAE_TCR_SH_OS; + vtcr->irgn = ARM_LPAE_TCR_RGN_NC; + vtcr->orgn = ARM_LPAE_TCR_RGN_NC; + } sl = data->start_level; switch (ARM_LPAE_GRANULE(data)) { case SZ_4K: - reg |= ARM_LPAE_TCR_TG0_4K; + vtcr->tg = ARM_LPAE_TCR_TG0_4K; sl++; /* SL0 format is different for 4K granule size */ break; case SZ_16K: - reg |= ARM_LPAE_TCR_TG0_16K; + vtcr->tg = ARM_LPAE_TCR_TG0_16K; break; case SZ_64K: - reg |= ARM_LPAE_TCR_TG0_64K; + vtcr->tg = ARM_LPAE_TCR_TG0_64K; break; } switch (cfg->oas) { case 32: - reg |= (ARM_LPAE_TCR_PS_32_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_32_BIT; break; case 36: - reg |= (ARM_LPAE_TCR_PS_36_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_36_BIT; break; case 40: - reg |= (ARM_LPAE_TCR_PS_40_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_40_BIT; break; case 42: - reg |= (ARM_LPAE_TCR_PS_42_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_42_BIT; break; case 44: - reg |= (ARM_LPAE_TCR_PS_44_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_44_BIT; break; case 48: - reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_48_BIT; break; case 52: - reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_52_BIT; break; default: goto out_free_data; } - reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT; - reg |= (~sl & ARM_LPAE_TCR_SL0_MASK) << ARM_LPAE_TCR_SL0_SHIFT; - cfg->arm_lpae_s2_cfg.vtcr = reg; + vtcr->tsz = 64ULL - cfg->ias; + vtcr->sl = ~sl & ARM_LPAE_VTCR_SL0_MASK; /* Allocate pgd pages */ data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), @@ -982,35 +986,21 @@ out_free_data: static struct io_pgtable * arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) { - struct io_pgtable *iop; - if (cfg->ias > 32 || cfg->oas > 40) return NULL; cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); - iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie); - if (iop) { - cfg->arm_lpae_s1_cfg.tcr |= ARM_32_LPAE_TCR_EAE; - cfg->arm_lpae_s1_cfg.tcr &= 0xffffffff; - } - - return iop; + return arm_64_lpae_alloc_pgtable_s1(cfg, cookie); } static struct io_pgtable * arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) { - struct io_pgtable *iop; - if (cfg->ias > 40 || cfg->oas > 40) return NULL; cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); - iop = arm_64_lpae_alloc_pgtable_s2(cfg, cookie); - if (iop) - cfg->arm_lpae_s2_cfg.vtcr &= 0xffffffff; - - return iop; + return arm_64_lpae_alloc_pgtable_s2(cfg, cookie); } static struct io_pgtable * diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index ced53e5b72b5..94394c81468f 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -63,7 +63,7 @@ void free_io_pgtable_ops(struct io_pgtable_ops *ops) if (!ops) return; - iop = container_of(ops, struct io_pgtable, ops); + iop = io_pgtable_ops_to_pgtable(ops); io_pgtable_tlb_flush_all(iop); io_pgtable_init_table[iop->fmt]->free(iop); } diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index d02edd2751f3..ecb3f9464dd5 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -374,7 +374,7 @@ static void ipmmu_domain_setup_context(struct ipmmu_vmsa_domain *domain) u32 tmp; /* TTBR0 */ - ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; + ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr; ipmmu_ctx_write_root(domain, IMTTLBR0, ttbr); ipmmu_ctx_write_root(domain, IMTTUBR0, ttbr >> 32); diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 93f14bca26ee..94a6df1bddd6 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -279,8 +279,8 @@ static void __program_context(void __iomem *base, int ctx, SET_V2PCFG(base, ctx, 0x3); SET_TTBCR(base, ctx, priv->cfg.arm_v7s_cfg.tcr); - SET_TTBR0(base, ctx, priv->cfg.arm_v7s_cfg.ttbr[0]); - SET_TTBR1(base, ctx, priv->cfg.arm_v7s_cfg.ttbr[1]); + SET_TTBR0(base, ctx, priv->cfg.arm_v7s_cfg.ttbr); + SET_TTBR1(base, ctx, 0); /* Set prrr and nmrr */ SET_PRRR(base, ctx, priv->cfg.arm_v7s_cfg.prrr); diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 6fc1f5ecf91e..95945f467c03 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -367,7 +367,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, /* Update the pgtable base address register of the M4U HW */ if (!data->m4u_dom) { data->m4u_dom = dom; - writel(dom->cfg.arm_v7s_cfg.ttbr[0] & MMU_PT_ADDR_MASK, + writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, data->base + REG_MMU_PT_BASE_ADDR); } @@ -765,7 +765,7 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR); writel_relaxed(reg->vld_pa_rng, base + REG_MMU_VLD_PA_RNG); if (m4u_dom) - writel(m4u_dom->cfg.arm_v7s_cfg.ttbr[0] & MMU_PT_ADDR_MASK, + writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, base + REG_MMU_PT_BASE_ADDR); return 0; } diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index e7bc8b721947..20738aacac89 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -203,8 +203,12 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, if (err) break; } - } + fwspec = dev_iommu_fwspec_get(dev); + if (!err && fwspec) + of_property_read_u32(master_np, "pasid-num-bits", + &fwspec->num_pasid_bits); + } /* * Two success conditions can be represented by non-negative err here: diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 52f38292df5b..39759db4f003 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -201,7 +201,7 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev) fsr = iommu_readl(ctx, ARM_SMMU_CB_FSR); - if (!(fsr & FSR_FAULT)) + if (!(fsr & ARM_SMMU_FSR_FAULT)) return IRQ_NONE; fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0); @@ -215,7 +215,7 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev) } iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr); - iommu_writel(ctx, ARM_SMMU_CB_RESUME, RESUME_TERMINATE); + iommu_writel(ctx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE); return IRQ_HANDLED; } @@ -269,18 +269,15 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, /* TTBRs */ iommu_writeq(ctx, ARM_SMMU_CB_TTBR0, - pgtbl_cfg.arm_lpae_s1_cfg.ttbr[0] | - FIELD_PREP(TTBRn_ASID, ctx->asid)); - iommu_writeq(ctx, ARM_SMMU_CB_TTBR1, - pgtbl_cfg.arm_lpae_s1_cfg.ttbr[1] | - FIELD_PREP(TTBRn_ASID, ctx->asid)); + pgtbl_cfg.arm_lpae_s1_cfg.ttbr | + FIELD_PREP(ARM_SMMU_TTBRn_ASID, ctx->asid)); + iommu_writeq(ctx, ARM_SMMU_CB_TTBR1, 0); /* TCR */ iommu_writel(ctx, ARM_SMMU_CB_TCR2, - (pgtbl_cfg.arm_lpae_s1_cfg.tcr >> 32) | - FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM)); + arm_smmu_lpae_tcr2(&pgtbl_cfg)); iommu_writel(ctx, ARM_SMMU_CB_TCR, - pgtbl_cfg.arm_lpae_s1_cfg.tcr); + arm_smmu_lpae_tcr(&pgtbl_cfg) | ARM_SMMU_TCR_EAE); /* MAIRs (stage-1 only) */ iommu_writel(ctx, ARM_SMMU_CB_S1_MAIR0, @@ -289,11 +286,13 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, pgtbl_cfg.arm_lpae_s1_cfg.mair >> 32); /* SCTLR */ - reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | - SCTLR_M | SCTLR_S1_ASIDPNE | SCTLR_CFCFG; + reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | + ARM_SMMU_SCTLR_AFE | ARM_SMMU_SCTLR_TRE | + ARM_SMMU_SCTLR_M | ARM_SMMU_SCTLR_S1_ASIDPNE | + ARM_SMMU_SCTLR_CFCFG; if (IS_ENABLED(CONFIG_BIG_ENDIAN)) - reg |= SCTLR_E; + reg |= ARM_SMMU_SCTLR_E; iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg); diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index ee21eedafe98..53d53c6c2be9 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -83,12 +83,16 @@ struct io_pgtable_cfg { * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs * on unmap, for DMA domains using the flush queue mechanism for * delayed invalidation. + * + * IO_PGTABLE_QUIRK_ARM_TTBR1: (ARM LPAE format) Configure the table + * for use in the upper half of a split address space. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) #define IO_PGTABLE_QUIRK_NON_STRICT BIT(4) + #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; @@ -100,18 +104,33 @@ struct io_pgtable_cfg { /* Low-level data specific to the table format */ union { struct { - u64 ttbr[2]; - u64 tcr; + u64 ttbr; + struct { + u32 ips:3; + u32 tg:2; + u32 sh:2; + u32 orgn:2; + u32 irgn:2; + u32 tsz:6; + } tcr; u64 mair; } arm_lpae_s1_cfg; struct { u64 vttbr; - u64 vtcr; + struct { + u32 ps:3; + u32 tg:2; + u32 sh:2; + u32 orgn:2; + u32 irgn:2; + u32 sl:2; + u32 tsz:6; + } vtcr; } arm_lpae_s2_cfg; struct { - u32 ttbr[2]; + u32 ttbr; u32 tcr; u32 nmrr; u32 prrr; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e9f94d3f7a04..c1ad15228447 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -388,12 +388,19 @@ void iommu_device_sysfs_remove(struct iommu_device *iommu); int iommu_device_link(struct iommu_device *iommu, struct device *link); void iommu_device_unlink(struct iommu_device *iommu, struct device *link); -static inline void iommu_device_set_ops(struct iommu_device *iommu, - const struct iommu_ops *ops) +static inline void __iommu_device_set_ops(struct iommu_device *iommu, + const struct iommu_ops *ops) { iommu->ops = ops; } +#define iommu_device_set_ops(iommu, ops) \ +do { \ + struct iommu_ops *__ops = (struct iommu_ops *)(ops); \ + __ops->owner = THIS_MODULE; \ + __iommu_device_set_ops(iommu, __ops); \ +} while (0) + static inline void iommu_device_set_fwnode(struct iommu_device *iommu, struct fwnode_handle *fwnode) { @@ -572,6 +579,7 @@ struct iommu_group *fsl_mc_device_group(struct device *dev); * @ops: ops for this device's IOMMU * @iommu_fwnode: firmware handle for this device's IOMMU * @iommu_priv: IOMMU driver private data for this device + * @num_pasid_bits: number of PASID bits supported by this device * @num_ids: number of associated device IDs * @ids: IDs which this device may present to the IOMMU */ @@ -580,6 +588,7 @@ struct iommu_fwspec { struct fwnode_handle *iommu_fwnode; void *iommu_priv; u32 flags; + u32 num_pasid_bits; unsigned int num_ids; u32 ids[1]; }; diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 5d62e78946a3..d08f0869f121 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -33,6 +33,9 @@ void pci_disable_pasid(struct pci_dev *pdev); int pci_pasid_features(struct pci_dev *pdev); int pci_max_pasids(struct pci_dev *pdev); #else /* CONFIG_PCI_PASID */ +static inline int pci_enable_pasid(struct pci_dev *pdev, int features) +{ return -EINVAL; } +static inline void pci_disable_pasid(struct pci_dev *pdev) { } static inline int pci_pasid_features(struct pci_dev *pdev) { return -EINVAL; } static inline int pci_max_pasids(struct pci_dev *pdev) |