From f196a86687974cfcc1e8cade99ffca4605141860 Mon Sep 17 00:00:00 2001 From: Fangyu Yu Date: Fri, 17 Apr 2026 22:07:45 +0800 Subject: iommu/riscv: Advertise Svpbmt support to generic page table The RISC-V IOMMU can optionally support Svpbmt page-based memory types in its page table format. When present,the generic page table code can use this capability to encode memory attributes (e.g. MMIO vs normal memory) in PTEs. Signed-off-by: Fangyu Yu Reviewed-by: Jason Gunthorpe Reviewed-by: Anup Patel Reviewed-by: Guo Ren Reviewed-by: Nutty Liu Reviewed-by: Kevin Tian Signed-off-by: Joerg Roedel --- include/linux/generic_pt/common.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h index fc5d0b5edadc..2683e5b38998 100644 --- a/include/linux/generic_pt/common.h +++ b/include/linux/generic_pt/common.h @@ -188,6 +188,10 @@ enum { * Support the 64k contiguous page size following the Svnapot extension. */ PT_FEAT_RISCV_SVNAPOT_64K = PT_FEAT_FMT_START, + /* + * Support Svpbmt extension: encode page-based memory type (PBMT) in PTEs. + */ + PT_FEAT_RISCV_SVPBMT, }; -- cgit v1.2.3 From 74c9d82c7e3fe9e5845103fe83d711d89b169ab1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 8 May 2026 11:53:00 -0300 Subject: iommu: Split the kdoc comment for struct iommu_iotlb_gather Use in-line member documentation and add some small clarifications to the members. This is preparation to add more members. - Note that pgsize is only used by arm-smmuv3 - Note that freelist is only used by iommupt - Reword queued to emphasize the flush-all behavior Signed-off-by: Jason Gunthorpe Reviewed-by: Pranjal Shrivastava Tested-by: Andrew Jones Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e587d4ac4d33..a9e89911c90f 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -345,12 +345,6 @@ struct iommu_pages_list { /** * struct iommu_iotlb_gather - Range information for a pending IOTLB flush * - * @start: IOVA representing the start of the range to be flushed - * @end: IOVA representing the end of the range to be flushed (inclusive) - * @pgsize: The interval at which to perform the flush - * @freelist: Removed pages to free after sync - * @queued: Indicates that the flush will be queued - * * This structure is intended to be updated by multiple calls to the * ->unmap() function in struct iommu_ops before eventually being passed * into ->iotlb_sync(). Drivers can add pages to @freelist to be freed after @@ -359,10 +353,24 @@ struct iommu_pages_list { * later instead of ->iotlb_sync(), so drivers may optimise accordingly. */ struct iommu_iotlb_gather { + /** @start: IOVA representing the start of the range to be flushed */ unsigned long start; + /** + * @end: IOVA representing the end of the range to be + * flushed (inclusive) + */ unsigned long end; + /** + * @pgsize: The interval at which to perform the flush, only used + * by arm-smmu-v3 + */ size_t pgsize; + /** + * @freelist: Removed pages to free after sync, only used by + * iommupt + */ struct iommu_pages_list freelist; + /** @queued: True if the gather will be completed with a flush all */ bool queued; }; -- cgit v1.2.3 From 89792629d4b260104555f9719ac84c1afc9a2012 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 8 May 2026 11:53:02 -0300 Subject: iommupt: Add PT_FEAT_DETAILED_GATHER Generating the ARM SMMUv3 and RISC-V invalidation commands optimally requires some additional details from iommupt: - leaf_levels_bitmap is used to compute the ARM Range Invalidation Table Top Level hint - leaf_levels_bitmap is also used to compute the stride when generating single invalidations to invalidate once per leaf - table_levels_bitmap also computes the ARM TTL for future cases when there are no leaves Put these under a feature since only two drivers need to calculate them. This is also useful for the coming kunit iotlb invalidation test to know more about what invalidation is happening. Signed-off-by: Jason Gunthorpe Reviewed-by: Pranjal Shrivastava Tested-by: Andrew Jones Signed-off-by: Joerg Roedel --- drivers/iommu/generic_pt/iommu_pt.h | 23 +++++++++++++++++++++++ include/linux/generic_pt/common.h | 5 +++++ include/linux/iommu.h | 30 +++++++++++++++++++++++++----- 3 files changed, 53 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h index 5ec135cf43e2..61c6d79c712c 100644 --- a/drivers/iommu/generic_pt/iommu_pt.h +++ b/drivers/iommu/generic_pt/iommu_pt.h @@ -43,6 +43,8 @@ static void flush_writes_item(const struct pt_state *pts) struct iommupt_pending_gather { struct iommu_iotlb_gather *iotlb_gather; struct iommu_pages_list free_list; + u8 leaf_levels_bitmap; + u8 table_levels_bitmap; }; static void gather_add_table(struct iommupt_pending_gather *pending, @@ -50,6 +52,17 @@ static void gather_add_table(struct iommupt_pending_gather *pending, struct pt_table_p *table) { iommu_pages_list_add(&pending->free_list, table); + if (pts_feature(pts, PT_FEAT_DETAILED_GATHER)) + pending->table_levels_bitmap |= BIT(pts->level); +} + +static void gather_add_leaf(struct iommupt_pending_gather *pending, + const struct pt_state *pts) +{ + if (!pts_feature(pts, PT_FEAT_DETAILED_GATHER)) + return; + + pending->leaf_levels_bitmap |= BIT(pts->level); } static void gather_range_pending(struct iommupt_pending_gather *pending, @@ -86,6 +99,15 @@ static void gather_range_pending(struct iommupt_pending_gather *pending, iommu_pages_list_splice(&pending->free_list, &iotlb_gather->freelist); INIT_LIST_HEAD(&pending->free_list.pages); + + if (pt_feature(common, PT_FEAT_DETAILED_GATHER)) { + iotlb_gather->pt.leaf_levels_bitmap |= + pending->leaf_levels_bitmap; + iotlb_gather->pt.table_levels_bitmap |= + pending->table_levels_bitmap; + pending->leaf_levels_bitmap = 0; + pending->table_levels_bitmap = 0; + } } #define DOMAIN_NS(op) CONCATENATE(CONCATENATE(pt_iommu_, PTPFX), op) @@ -1059,6 +1081,7 @@ start_oa: */ num_contig_lg2 = pt_entry_num_contig_lg2(&pts); pt_clear_entries(&pts, num_contig_lg2); + gather_add_leaf(&unmap->pending, &pts); num_oas += log2_to_int(num_contig_lg2); if (pts.index < flush_start_index) flush_start_index = pts.index; diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h index 2683e5b38998..07ef1c8341a4 100644 --- a/include/linux/generic_pt/common.h +++ b/include/linux/generic_pt/common.h @@ -134,6 +134,11 @@ enum pt_features { * significant amount of page table. */ PT_FEAT_FLUSH_RANGE_NO_GAPS, + /** + * @PT_FEAT_DETAILED_GATHER: Fill in the struct iommu_iotlb_gather pt + * sub structure with information about which levels were changed. + */ + PT_FEAT_DETAILED_GATHER, /* private: */ PT_FEAT_FMT_START, }; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a9e89911c90f..bf8a77a164e4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -360,11 +360,31 @@ struct iommu_iotlb_gather { * flushed (inclusive) */ unsigned long end; - /** - * @pgsize: The interval at which to perform the flush, only used - * by arm-smmu-v3 - */ - size_t pgsize; + + union { + /** + * @pgsize: The interval at which to perform the flush, only + * used by arm-smmu-v3 + */ + size_t pgsize; + struct { + /** + * @pt.leaf_levels_bitmap: Bitmap of generic_pt + * levels where leaf entries were unmapped. Bit 0 + * means the leaf only level. If 0 no leafs + * were unmapped. + */ + u8 leaf_levels_bitmap; + /** + * @pt.table_levels_bitmap: Bitmap of generic_pt levels + * of table entries that were removed. Bit 0 is never + * set, bit 1 means a table of all leafs was removed. + * When freelist is empty this must be 0. + */ + u8 table_levels_bitmap; + } pt; + }; + /** * @freelist: Removed pages to free after sync, only used by * iommupt -- cgit v1.2.3 From 91561e1dc94b8a33857370ef3c5b5523c4461d5b Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 21 May 2026 13:34:20 -0700 Subject: PCI: Add pci_ats_required() for CXL.cache capable devices Controlled by IOMMU drivers, ATS can be enabled "on demand", when a given PASID on a device is attached to an I/O page table. This is working, even when a device has no translation on its RID (i.e., RID is IOMMU bypassed). However, certain PCIe devices require non-PASID ATS on their RID even when the RID is IOMMU bypassed. Call this "ATS always on" in IOMMU term. For example, CXL spec r4.0 notes in sec 3.2.5.13 Memory Type on CXL.cache: "To source requests on CXL.cache, devices need to get the Host Physical Address (HPA) from the Host by means of an ATS request on CXL.io." In other words, the CXL.cache capability requires ATS; otherwise, it can't access host physical memory. Introduce a new pci_ats_required() helper for the IOMMU driver to scan a PCI device and shift ATS policies between "on demand" and "always on". Add the support for CXL.cache devices first. Pre-CXL devices will be added in quirks.c file. Note that pci_ats_required() validates against pci_ats_supported(), so we ensure that untrusted devices (e.g. external ports) will not be always on. This maintains the existing ATS security policy regarding potential side- channel attacks via ATS. Cc: linux-cxl@vger.kernel.org Suggested-by: Vikram Sethi Suggested-by: Jason Gunthorpe Reviewed-by: Jonathan Cameron Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Tested-by: Nirmoy Das Acked-by: Nirmoy Das Reviewed-by: Dave Jiang Acked-by: Bjorn Helgaas Signed-off-by: Nicolin Chen Reviewed-by: Yi Liu Signed-off-by: Joerg Roedel --- drivers/pci/ats.c | 46 +++++++++++++++++++++++++++++++++++++++++++ include/linux/pci-ats.h | 3 +++ include/uapi/linux/pci_regs.h | 1 + 3 files changed, 50 insertions(+) (limited to 'include/linux') diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index ec6c8dbdc5e9..84cd06d74fc9 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -205,6 +205,52 @@ int pci_ats_page_aligned(struct pci_dev *pdev) return 0; } +/* + * CXL r4.0, sec 3.2.5.13 Memory Type on CXL.cache notes: to source requests on + * CXL.cache, devices need to get the Host Physical Address (HPA) from the Host + * by means of an ATS request on CXL.io. + * + * In other words, CXL.cache devices cannot access host physical memory without + * ATS. + * + * Check Cache_Capable instead of Cache_Enable because CXL.cache may be enabled + * after the caller uses this to make its ATS decision. + */ +static bool pci_cxl_ats_required(struct pci_dev *pdev) +{ + int offset; + u16 cap; + + offset = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_DEVICE); + if (!offset) + return false; + + if (pci_read_config_word(pdev, offset + PCI_DVSEC_CXL_CAP, &cap)) + return false; + + return cap & PCI_DVSEC_CXL_CACHE_CAPABLE; +} + +/** + * pci_ats_required - Whether the PCI device requires ATS + * @pdev: the PCI device + * + * Returns true, if the PCI device requires ATS for basic functional operation. + */ +bool pci_ats_required(struct pci_dev *pdev) +{ + if (!pci_ats_supported(pdev)) + return false; + + /* A VF inherits its PF's requirement for ATS function */ + if (pdev->is_virtfn) + pdev = pci_physfn(pdev); + + return pci_cxl_ats_required(pdev); +} +EXPORT_SYMBOL_GPL(pci_ats_required); + #ifdef CONFIG_PCI_PRI void pci_pri_init(struct pci_dev *pdev) { diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 75c6c86cf09d..f3723b686129 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -12,6 +12,7 @@ int pci_prepare_ats(struct pci_dev *dev, int ps); void pci_disable_ats(struct pci_dev *dev); int pci_ats_queue_depth(struct pci_dev *dev); int pci_ats_page_aligned(struct pci_dev *dev); +bool pci_ats_required(struct pci_dev *dev); #else /* CONFIG_PCI_ATS */ static inline bool pci_ats_supported(struct pci_dev *d) { return false; } @@ -24,6 +25,8 @@ static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; } static inline int pci_ats_page_aligned(struct pci_dev *dev) { return 0; } +static inline bool pci_ats_required(struct pci_dev *dev) +{ return false; } #endif /* CONFIG_PCI_ATS */ #ifdef CONFIG_PCI_PRI diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 14f634ab9350..6ac45be1008b 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1349,6 +1349,7 @@ /* CXL r4.0, 8.1.3: PCIe DVSEC for CXL Device */ #define PCI_DVSEC_CXL_DEVICE 0 #define PCI_DVSEC_CXL_CAP 0xA +#define PCI_DVSEC_CXL_CACHE_CAPABLE _BITUL(0) #define PCI_DVSEC_CXL_MEM_CAPABLE _BITUL(2) #define PCI_DVSEC_CXL_HDM_COUNT __GENMASK(5, 4) #define PCI_DVSEC_CXL_CTRL 0xC -- cgit v1.2.3