summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@nvidia.com>2026-01-25 00:00:21 +0300
committerJoerg Roedel <joerg.roedel@amd.com>2026-01-28 17:14:17 +0300
commit5815d9303c67cef5f47cd01e73b671e6b9c40ef3 (patch)
tree68aa7c3ac3e79212995e2f86333e594d28dd79dc
parent63804fed149a6750ffd28610c5c1c98cce6bd377 (diff)
downloadlinux-5815d9303c67cef5f47cd01e73b671e6b9c40ef3.tar.xz
iommupt: Only cache flush memory changed by unmap
The cache flush was happening on every level across the whole range of iteration, even if no leafs or tables were cleared. Instead flush only the sub range that was actually written. Overflushing isn't a correctness problem but it does impact the performance of unmap. After this series the performance compared to the original VT-d implementation with cache flushing turned on is: map_pages pgsz ,avg new,old ns, min new,old ns , min % (+ve is better) 2^12, 253,266 , 213,227 , 6.06 2^21, 246,244 , 221,219 , 0.00 2^30, 231,240 , 209,217 , 3.03 256*2^12, 2604,2668 , 2415,2540 , 4.04 256*2^21, 2495,2824 , 2390,2734 , 12.12 256*2^30, 2542,2845 , 2380,2718 , 12.12 unmap_pages pgsz ,avg new,old ns, min new,old ns , min % (+ve is better) 2^12, 259,292 , 222,251 , 11.11 2^21, 255,259 , 227,236 , 3.03 2^30, 238,254 , 217,230 , 5.05 256*2^12, 2751,2620 , 2417,2437 , 0.00 256*2^21, 2461,2526 , 2377,2423 , 1.01 256*2^30, 2498,2543 , 2370,2404 , 1.01 Fixes: efa03dab7ce4 ("iommupt: Flush the CPU cache after any writes to the page table") Reported-by: Francois Dugast <francois.dugast@intel.com> Closes: https://lore.kernel.org/all/20260121130233.257428-1-francois.dugast@intel.com/ Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> Tested-by: Francois Dugast <francois.dugast@intel.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
-rw-r--r--drivers/iommu/generic_pt/iommu_pt.h11
1 files changed, 10 insertions, 1 deletions
diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h
index 52ef028ed2db..d575f3ba9d34 100644
--- a/drivers/iommu/generic_pt/iommu_pt.h
+++ b/drivers/iommu/generic_pt/iommu_pt.h
@@ -931,6 +931,8 @@ static __maybe_unused int __unmap_range(struct pt_range *range, void *arg,
struct pt_table_p *table)
{
struct pt_state pts = pt_init(range, level, table);
+ unsigned int flush_start_index = UINT_MAX;
+ unsigned int flush_end_index = UINT_MAX;
struct pt_unmap_args *unmap = arg;
unsigned int num_oas = 0;
unsigned int start_index;
@@ -986,6 +988,9 @@ static __maybe_unused int __unmap_range(struct pt_range *range, void *arg,
iommu_pages_list_add(&unmap->free_list,
pts.table_lower);
pt_clear_entries(&pts, ilog2(1));
+ if (pts.index < flush_start_index)
+ flush_start_index = pts.index;
+ flush_end_index = pts.index + 1;
}
pts.index++;
} else {
@@ -999,7 +1004,10 @@ start_oa:
num_contig_lg2 = pt_entry_num_contig_lg2(&pts);
pt_clear_entries(&pts, num_contig_lg2);
num_oas += log2_to_int(num_contig_lg2);
+ if (pts.index < flush_start_index)
+ flush_start_index = pts.index;
pts.index += log2_to_int(num_contig_lg2);
+ flush_end_index = pts.index;
}
if (pts.index >= pts.end_index)
break;
@@ -1007,7 +1015,8 @@ start_oa:
} while (true);
unmap->unmapped += log2_mul(num_oas, pt_table_item_lg2sz(&pts));
- flush_writes_range(&pts, start_index, pts.index);
+ if (flush_start_index != flush_end_index)
+ flush_writes_range(&pts, flush_start_index, flush_end_index);
return ret;
}