diff options
author | Heiko Carstens <heiko.carstens@de.ibm.com> | 2014-09-03 15:26:23 +0400 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2014-09-09 10:53:30 +0400 |
commit | 3d1e220d08c6a00ffa83d39030b8162f66665b2b (patch) | |
tree | 4529f0d568ef53d296476a640d26ae0128bcbacf /arch/s390/kernel/mcount64.S | |
parent | ea2f47699082b971769be8b8f38c08b49219f471 (diff) | |
download | linux-3d1e220d08c6a00ffa83d39030b8162f66665b2b.tar.xz |
s390/ftrace: optimize mcount code
Reduce the number of executed instructions within the mcount block if
function tracing is enabled. We achieve that by using a non-standard
C function call ABI. Since the called function is also written in
assembler this is not a problem.
This also allows to replace the unconditional store at the beginning
of the mcount block with a larl instruction, which doesn't touch
memory.
In theory we could also patch the first instruction of the mcount block
to enable and disable function tracing. However this would break kprobes.
This could be fixed with implementing the "kprobes_on_ftrace" feature;
however keeping the odd jprobes working seems not to be possible without
a lot of code churn. Therefore keep the code easy and simply accept one
wasted 1-cycle "larl" instruction per function prologue.
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/kernel/mcount64.S')
-rw-r--r-- | arch/s390/kernel/mcount64.S | 30 |
1 files changed, 14 insertions, 16 deletions
diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S index 8cf976f83a10..07abe8d464d4 100644 --- a/arch/s390/kernel/mcount64.S +++ b/arch/s390/kernel/mcount64.S @@ -16,7 +16,6 @@ ENTRY(ftrace_stub) br %r14 #define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) -#define STACK_PARENT_IP (STACK_FRAME_SIZE + 8) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) @@ -31,40 +30,39 @@ ENTRY(ftrace_caller) aghi %r15,-STACK_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) - stmg %r0,%r13,STACK_PTREGS_GPRS(%r15) - stg %r14,(STACK_PTREGS_PSW+8)(%r15) + stg %r0,(STACK_PTREGS_PSW+8)(%r15) + stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - aghik %r2,%r14,-MCOUNT_INSN_SIZE + aghik %r2,%r0,-MCOUNT_INSN_SIZE lgrl %r4,function_trace_op - lgrl %r14,ftrace_trace_function + lgrl %r1,ftrace_trace_function #else - lgr %r2,%r14 + lgr %r2,%r0 aghi %r2,-MCOUNT_INSN_SIZE larl %r4,function_trace_op lg %r4,0(%r4) - larl %r14,ftrace_trace_function - lg %r14,0(%r14) + larl %r1,ftrace_trace_function + lg %r1,0(%r1) #endif - lg %r3,STACK_PARENT_IP(%r15) + lgr %r3,%r14 la %r5,STACK_PTREGS(%r15) - basr %r14,%r14 + basr %r14,%r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER # The j instruction gets runtime patched to a nop instruction. # See ftrace_enable_ftrace_graph_caller. The patched instruction is: # j .+4 ENTRY(ftrace_graph_caller) j ftrace_graph_caller_end - lg %r2,STACK_PARENT_IP(%r15) + lg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) lg %r3,(STACK_PTREGS_PSW+8)(%r15) brasl %r14,prepare_ftrace_return - stg %r2,STACK_PARENT_IP(%r15) + stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) ftrace_graph_caller_end: .globl ftrace_graph_caller_end #endif - lmg %r0,%r13,STACK_PTREGS_GPRS(%r15) - lg %r14,(STACK_PTREGS_PSW+8)(%r15) - aghi %r15,STACK_FRAME_SIZE - br %r14 + lg %r1,(STACK_PTREGS_PSW+8)(%r15) + lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15) + br %r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER |