diff options
-rw-r--r-- | arch/powerpc/include/asm/paca.h | 3 | ||||
-rw-r--r-- | arch/powerpc/kernel/asm-offsets.c | 3 | ||||
-rw-r--r-- | arch/powerpc/kernel/exceptions-64s.S | 76 | ||||
-rw-r--r-- | arch/powerpc/kernel/setup_64.c | 13 | ||||
-rw-r--r-- | arch/powerpc/xmon/xmon.c | 2 |
5 files changed, 39 insertions, 58 deletions
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 1b7fd535393b..b62c31037cad 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -239,8 +239,7 @@ struct paca_struct { */ u64 exrfi[EX_SIZE] __aligned(0x80); void *rfi_flush_fallback_area; - u64 l1d_flush_congruence; - u64 l1d_flush_sets; + u64 l1d_flush_size; #endif }; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index fa5c4125f42a..88b84ac76b53 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -239,8 +239,7 @@ int main(void) OFFSET(PACA_IN_NMI, paca_struct, in_nmi); OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area); OFFSET(PACA_EXRFI, paca_struct, exrfi); - OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence); - OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets); + OFFSET(PACA_L1D_FLUSH_SIZE, paca_struct, l1d_flush_size); #endif OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 9e6882bdc526..243d072a225a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1461,39 +1461,37 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) std r9,PACA_EXRFI+EX_R9(r13) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) - std r12,PACA_EXRFI+EX_R12(r13) - std r8,PACA_EXRFI+EX_R13(r13) mfctr r9 ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SETS(r13) - ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ - addi r12,r12,8 + ld r11,PACA_L1D_FLUSH_SIZE(r13) + srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ mtctr r11 DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ /* order ld/st prior to dcbt stop all streams with flushing */ sync -1: li r8,0 - .rept 8 /* 8-way set associative */ - ldx r11,r10,r8 - add r8,r8,r12 - xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not - add r8,r8,r11 // Add 0, this creates a dependency on the ldx - .endr - addi r10,r10,128 /* 128 byte cache line */ + + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ +1: + ld r11,(0x80 + 8)*0(r10) + ld r11,(0x80 + 8)*1(r10) + ld r11,(0x80 + 8)*2(r10) + ld r11,(0x80 + 8)*3(r10) + ld r11,(0x80 + 8)*4(r10) + ld r11,(0x80 + 8)*5(r10) + ld r11,(0x80 + 8)*6(r10) + ld r11,(0x80 + 8)*7(r10) + addi r10,r10,0x80*8 bdnz 1b mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) ld r11,PACA_EXRFI+EX_R11(r13) - ld r12,PACA_EXRFI+EX_R12(r13) - ld r8,PACA_EXRFI+EX_R13(r13) GET_SCRATCH0(r13); rfid @@ -1503,39 +1501,37 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) std r9,PACA_EXRFI+EX_R9(r13) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) - std r12,PACA_EXRFI+EX_R12(r13) - std r8,PACA_EXRFI+EX_R13(r13) mfctr r9 ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SETS(r13) - ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ - addi r12,r12,8 + ld r11,PACA_L1D_FLUSH_SIZE(r13) + srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ mtctr r11 DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ /* order ld/st prior to dcbt stop all streams with flushing */ sync -1: li r8,0 - .rept 8 /* 8-way set associative */ - ldx r11,r10,r8 - add r8,r8,r12 - xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not - add r8,r8,r11 // Add 0, this creates a dependency on the ldx - .endr - addi r10,r10,128 /* 128 byte cache line */ + + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ +1: + ld r11,(0x80 + 8)*0(r10) + ld r11,(0x80 + 8)*1(r10) + ld r11,(0x80 + 8)*2(r10) + ld r11,(0x80 + 8)*3(r10) + ld r11,(0x80 + 8)*4(r10) + ld r11,(0x80 + 8)*5(r10) + ld r11,(0x80 + 8)*6(r10) + ld r11,(0x80 + 8)*7(r10) + addi r10,r10,0x80*8 bdnz 1b mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) ld r11,PACA_EXRFI+EX_R11(r13) - ld r12,PACA_EXRFI+EX_R12(r13) - ld r8,PACA_EXRFI+EX_R13(r13) GET_SCRATCH0(r13); hrfid diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index d1fa0e91f526..c388cc3357fa 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -875,19 +875,8 @@ static void init_fallback_flush(void) memset(l1d_flush_fallback_area, 0, l1d_size * 2); for_each_possible_cpu(cpu) { - /* - * The fallback flush is currently coded for 8-way - * associativity. Different associativity is possible, but it - * will be treated as 8-way and may not evict the lines as - * effectively. - * - * 128 byte lines are mandatory. - */ - u64 c = l1d_size / 8; - paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; - paca[cpu].l1d_flush_congruence = c; - paca[cpu].l1d_flush_sets = c / 128; + paca[cpu].l1d_flush_size = l1d_size; } } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 01d9a2dcff20..82e1a3ee6e0f 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2377,8 +2377,6 @@ static void dump_one_paca(int cpu) printf(" slb_cache[%d]: = 0x%016lx\n", i, p->slb_cache[i]); DUMP(p, rfi_flush_fallback_area, "px"); - DUMP(p, l1d_flush_congruence, "llx"); - DUMP(p, l1d_flush_sets, "llx"); #endif DUMP(p, dscr_default, "llx"); #ifdef CONFIG_PPC_BOOK3E |