diff options
author | Anthony Yznaga <anthony.yznaga@oracle.com> | 2017-08-18 22:40:36 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-09-10 06:20:11 +0300 |
commit | a7159a87a3836f61a97882e671d2d66bbb96c62e (patch) | |
tree | 3f56de47be55b0367bb7588869f9c95ee2516409 | |
parent | 5bd0ea9107dca975dc4ba4d9de39b4938d2cb36d (diff) | |
download | linux-a7159a87a3836f61a97882e671d2d66bbb96c62e.tar.xz |
sparc64: speed up etrap/rtrap on NG2 and later processors
For many sun4v processor types, reading or writing a privileged register
has a latency of 40 to 70 cycles. Use a combination of the low-latency
allclean, otherw, normalw, and nop instructions in etrap and rtrap to
replace 2 rdpr and 5 wrpr instructions and improve etrap/rtrap
performance. allclean, otherw, and normalw are available on NG2 and
later processors.
The average ticks to execute the flush windows trap ("ta 0x3") with and
without this patch on select platforms:
CPU Not patched Patched % Latency Reduction
NG2 1762 1558 -11.58
NG4 3619 3204 -11.47
M7 3015 2624 -12.97
SPARC64-X 829 770 -7.12
Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | arch/sparc/include/asm/trap_block.h | 2 | ||||
-rw-r--r-- | arch/sparc/kernel/etrap_64.S | 26 | ||||
-rw-r--r-- | arch/sparc/kernel/rtrap_64.S | 13 | ||||
-rw-r--r-- | arch/sparc/kernel/setup_64.c | 5 | ||||
-rw-r--r-- | arch/sparc/kernel/vmlinux.lds.S | 5 |
5 files changed, 45 insertions, 6 deletions
diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h index ff05992dae7a..dfc538609eb2 100644 --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h @@ -73,6 +73,8 @@ struct sun4v_1insn_patch_entry { }; extern struct sun4v_1insn_patch_entry __sun4v_1insn_patch, __sun4v_1insn_patch_end; +extern struct sun4v_1insn_patch_entry __fast_win_ctrl_1insn_patch, + __fast_win_ctrl_1insn_patch_end; struct sun4v_2insn_patch_entry { unsigned int addr; diff --git a/arch/sparc/kernel/etrap_64.S b/arch/sparc/kernel/etrap_64.S index 1276ca2567ba..5c237467d156 100644 --- a/arch/sparc/kernel/etrap_64.S +++ b/arch/sparc/kernel/etrap_64.S @@ -38,7 +38,11 @@ etrap_syscall: TRAP_LOAD_THREAD_REG(%g6, %g1) or %g1, %g3, %g1 bne,pn %xcc, 1f sub %sp, STACKFRAME_SZ+TRACEREG_SZ-STACK_BIAS, %g2 - wrpr %g0, 7, %cleanwin +661: wrpr %g0, 7, %cleanwin + .section .fast_win_ctrl_1insn_patch, "ax" + .word 661b + .word 0x85880000 ! allclean + .previous sethi %hi(TASK_REGOFF), %g2 sethi %hi(TSTATE_PEF), %g3 @@ -88,16 +92,30 @@ etrap_save: save %g2, -STACK_BIAS, %sp bne,pn %xcc, 3f mov PRIMARY_CONTEXT, %l4 - rdpr %canrestore, %g3 +661: rdpr %canrestore, %g3 + .section .fast_win_ctrl_1insn_patch, "ax" + .word 661b + nop + .previous + rdpr %wstate, %g2 - wrpr %g0, 0, %canrestore +661: wrpr %g0, 0, %canrestore + .section .fast_win_ctrl_1insn_patch, "ax" + .word 661b + nop + .previous sll %g2, 3, %g2 /* Set TI_SYS_FPDEPTH to 1 and clear TI_SYS_NOERROR. */ mov 1, %l5 sth %l5, [%l6 + TI_SYS_NOERROR] - wrpr %g3, 0, %otherwin +661: wrpr %g3, 0, %otherwin + .section .fast_win_ctrl_1insn_patch, "ax" + .word 661b + .word 0x87880000 ! otherw + .previous + wrpr %g2, 0, %wstate sethi %hi(sparc64_kern_pri_context), %g2 ldx [%g2 + %lo(sparc64_kern_pri_context)], %g3 diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index 709a82ebd294..dff86fad0a1f 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -224,10 +224,19 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 rdpr %otherwin, %l2 srl %l1, 3, %l1 - wrpr %l2, %g0, %canrestore +661: wrpr %l2, %g0, %canrestore + .section .fast_win_ctrl_1insn_patch, "ax" + .word 661b + .word 0x89880000 ! normalw + .previous + wrpr %l1, %g0, %wstate brnz,pt %l2, user_rtt_restore - wrpr %g0, %g0, %otherwin +661: wrpr %g0, %g0, %otherwin + .section .fast_win_ctrl_1insn_patch, "ax" + .word 661b + nop + .previous ldx [%g6 + TI_FLAGS], %g3 wr %g0, ASI_AIUP, %asi diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index c4088a3b1051..db4c4d7e28a0 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -300,6 +300,11 @@ static void __init sun4v_patch(void) break; } + if (sun4v_chip_type != SUN4V_CHIP_NIAGARA1) { + sun4v_patch_1insn_range(&__fast_win_ctrl_1insn_patch, + &__fast_win_ctrl_1insn_patch_end); + } + sun4v_hvapi_init(); } diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 34d37e6c2d06..d78847d56a4b 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -159,6 +159,11 @@ SECTIONS *(.pud_huge_patch) __pud_huge_patch_end = .; } + .fast_win_ctrl_1insn_patch : { + __fast_win_ctrl_1insn_patch = .; + *(.fast_win_ctrl_1insn_patch) + __fast_win_ctrl_1insn_patch_end = .; + } PERCPU_SECTION(SMP_CACHE_BYTES) #ifdef CONFIG_JUMP_LABEL |