summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@nvidia.com>2026-02-27 18:27:02 +0300
committerJoerg Roedel <joerg.roedel@amd.com>2026-03-17 15:15:27 +0300
commit199036ae01321651fe0e4488f9e19a28af4c5f1d (patch)
treead56d9599448484d09b5953f89fb21ca36852362
parent7cd0c655f02f08a5de851059ac8360e5d10fae62 (diff)
downloadlinux-199036ae01321651fe0e4488f9e19a28af4c5f1d.tar.xz
iommupt: Optimize the gather processing for DMA-FQ mode
In PT_FEAT_FLUSH_RANGE mode the gather was accumulated but never flushed and then the accumulated range was discarded by the dma-iommu code in DMA-FQ mode. This is basically optimal. However for PT_FEAT_FLUSH_RANGE_NO_GAPS the page table would push flushes that are redundant with the flush all generated by the DMA-FQ mode. Disable all range accumulation in the gather, and iommu_pt triggered flushing when in iommu_iotlb_gather_queued() indicates it is in DMA-FQ mode. Reported-by: Robin Murphy <robin.murphy@arm.com> Closes: https://lore.kernel.org/r/794b6121-b66b-4819-b291-9761ed21cd83@arm.com Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> Reviewed-by: Samiullah Khawaja <skhawaja@google.com> Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
-rw-r--r--drivers/iommu/generic_pt/iommu_pt.h27
1 files changed, 19 insertions, 8 deletions
diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h
index 3e33fe64feab..9c08bb594e41 100644
--- a/drivers/iommu/generic_pt/iommu_pt.h
+++ b/drivers/iommu/generic_pt/iommu_pt.h
@@ -51,16 +51,27 @@ static void gather_range_pages(struct iommu_iotlb_gather *iotlb_gather,
iommu_pages_stop_incoherent_list(free_list,
iommu_table->iommu_device);
- if (pt_feature(common, PT_FEAT_FLUSH_RANGE_NO_GAPS) &&
- iommu_iotlb_gather_is_disjoint(iotlb_gather, iova, len)) {
- iommu_iotlb_sync(&iommu_table->domain, iotlb_gather);
- /*
- * Note that the sync frees the gather's free list, so we must
- * not have any pages on that list that are covered by iova/len
- */
+ /*
+ * If running in DMA-FQ mode then the unmap will be followed by an IOTLB
+ * flush all so we need to optimize by never flushing the IOTLB here.
+ *
+ * For NO_GAPS the user gets to pick if flushing all or doing micro
+ * flushes is better for their work load by choosing DMA vs DMA-FQ
+ * operation. Drivers should also see shadow_on_flush.
+ */
+ if (!iommu_iotlb_gather_queued(iotlb_gather)) {
+ if (pt_feature(common, PT_FEAT_FLUSH_RANGE_NO_GAPS) &&
+ iommu_iotlb_gather_is_disjoint(iotlb_gather, iova, len)) {
+ iommu_iotlb_sync(&iommu_table->domain, iotlb_gather);
+ /*
+ * Note that the sync frees the gather's free list, so
+ * we must not have any pages on that list that are
+ * covered by iova/len
+ */
+ }
+ iommu_iotlb_gather_add_range(iotlb_gather, iova, len);
}
- iommu_iotlb_gather_add_range(iotlb_gather, iova, len);
iommu_pages_list_splice(free_list, &iotlb_gather->freelist);
}