summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm/book3s64/radix_tlb.c
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.ibm.com>2019-09-24 06:52:53 +0300
committerMichael Ellerman <mpe@ellerman.id.au>2019-09-24 13:58:55 +0300
commit047e6575aec71d75b765c22111820c4776cd1c43 (patch)
tree8742d25f78cbcf3add79662e241546d2d7e8846c /arch/powerpc/mm/book3s64/radix_tlb.c
parent09ce98cacd51fcd0fa0af2f79d1e1d3192f4cbb0 (diff)
downloadlinux-047e6575aec71d75b765c22111820c4776cd1c43.tar.xz
powerpc/mm: Fixup tlbie vs mtpidr/mtlpidr ordering issue on POWER9
On POWER9, under some circumstances, a broadcast TLB invalidation will fail to invalidate the ERAT cache on some threads when there are parallel mtpidr/mtlpidr happening on other threads of the same core. This can cause stores to continue to go to a page after it's unmapped. The workaround is to force an ERAT flush using PID=0 or LPID=0 tlbie flush. This additional TLB flush will cause the ERAT cache invalidation. Since we are using PID=0 or LPID=0, we don't get filtered out by the TLB snoop filtering logic. We need to still follow this up with another tlbie to take care of store vs tlbie ordering issue explained in commit: a5d4b5891c2f ("powerpc/mm: Fixup tlbie vs store ordering issue on POWER9"). The presence of ERAT cache implies we can still get new stores and they may miss store queue marking flush. Cc: stable@vger.kernel.org Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20190924035254.24612-3-aneesh.kumar@linux.ibm.com
Diffstat (limited to 'arch/powerpc/mm/book3s64/radix_tlb.c')
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c80
1 files changed, 72 insertions, 8 deletions
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 69fdc004d83f..67af871190c6 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -196,21 +196,82 @@ static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
-static inline void fixup_tlbie(void)
+
+static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
+ }
+}
+
+static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_pid(0, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
+ }
+}
+
+static inline void fixup_tlbie_pid(unsigned long pid)
{
- unsigned long pid = 0;
+ /*
+ * We can use any address for the invalidation, pick one which is
+ * probably unused as an optimisation.
+ */
unsigned long va = ((1UL << 52) - 1);
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_pid(0, RIC_FLUSH_TLB);
+ }
+
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
}
}
+
+static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
+ }
+}
+
static inline void fixup_tlbie_lpid(unsigned long lpid)
{
+ /*
+ * We can use any address for the invalidation, pick one which is
+ * probably unused as an optimisation.
+ */
unsigned long va = ((1UL << 52) - 1);
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid(0, RIC_FLUSH_TLB);
+ }
+
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
@@ -258,6 +319,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
switch (ric) {
case RIC_FLUSH_TLB:
__tlbie_pid(pid, RIC_FLUSH_TLB);
+ fixup_tlbie_pid(pid);
break;
case RIC_FLUSH_PWC:
__tlbie_pid(pid, RIC_FLUSH_PWC);
@@ -265,8 +327,8 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
case RIC_FLUSH_ALL:
default:
__tlbie_pid(pid, RIC_FLUSH_ALL);
+ fixup_tlbie_pid(pid);
}
- fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@@ -315,6 +377,7 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
switch (ric) {
case RIC_FLUSH_TLB:
__tlbie_lpid(lpid, RIC_FLUSH_TLB);
+ fixup_tlbie_lpid(lpid);
break;
case RIC_FLUSH_PWC:
__tlbie_lpid(lpid, RIC_FLUSH_PWC);
@@ -322,8 +385,8 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
case RIC_FLUSH_ALL:
default:
__tlbie_lpid(lpid, RIC_FLUSH_ALL);
+ fixup_tlbie_lpid(lpid);
}
- fixup_tlbie_lpid(lpid);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@@ -390,6 +453,8 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
for (addr = start; addr < end; addr += page_size)
__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+
+ fixup_tlbie_va_range(addr - page_size, pid, ap);
}
static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
@@ -399,7 +464,7 @@ static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, ap, ric);
- fixup_tlbie();
+ fixup_tlbie_va(va, pid, ap);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@@ -457,7 +522,7 @@ static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, lpid, ap, ric);
- fixup_tlbie_lpid(lpid);
+ fixup_tlbie_lpid_va(va, lpid, ap);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@@ -469,7 +534,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
if (also_pwc)
__tlbie_pid(pid, RIC_FLUSH_PWC);
__tlbie_va_range(start, end, pid, page_size, psize);
- fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@@ -856,7 +920,7 @@ is_local:
if (gflush)
__tlbie_va_range(gstart, gend, pid,
PUD_SIZE, MMU_PAGE_1G);
- fixup_tlbie();
+
asm volatile("eieio; tlbsync; ptesync": : :"memory");
} else {
_tlbiel_va_range_multicast(mm,