diff options
| author | Heiko Carstens <hca@linux.ibm.com> | 2026-05-26 08:56:58 +0300 |
|---|---|---|
| committer | Alexander Gordeev <agordeev@linux.ibm.com> | 2026-06-03 16:32:46 +0300 |
| commit | 18ec6c5d7ec6c73ccff616041d75b6ea30a9a36e (patch) | |
| tree | ed437ee23146e81d09c2f4ff72fb0542e09ae031 | |
| parent | b50403d4f3ea4dbdef758f21d4e11d40d9f61d7b (diff) | |
| download | linux-18ec6c5d7ec6c73ccff616041d75b6ea30a9a36e.tar.xz | |
s390/percpu: Use new percpu code section for arch_this_cpu_add()
Convert arch_this_cpu_add() to make use of the new percpu code section
infrastructure.
With this the text size of the kernel image is reduced by ~76kb
(defconfig). Also more than 5300 generated preempt_schedule_notrace()
function calls within the kernel image (modules not counted) are removed.
With:
DEFINE_PER_CPU(long, foo);
void bar(long a) { this_cpu_add(foo, a); }
Old arch_this_cpu_add() looks like this:
00000000000000c0 <bar>:
c0: c0 04 00 00 00 00 jgnop c0 <bar>
c6: eb 01 03 a8 00 6a asi 936,1
cc: c4 18 00 00 00 00 lgrl %r1,cc <bar+0xc>
ce: R_390_GOTENT foo+0x2
d2: e3 10 03 b8 00 08 ag %r1,952
d8: eb 22 10 00 00 e8 laag %r2,%r2,0(%r1)
de: eb ff 03 a8 00 6e alsi 936,-1
e4: a7 a4 00 05 jhe ee <bar+0x2e>
e8: c0 f4 00 00 00 00 jg e8 <bar+0x28>
ea: R_390_PC32DBL __s390_indirect_jump_r14+0x2
ee: c0 f4 00 00 00 00 jg ee <bar+0x2e>
f0: R_390_PLT32DBL preempt_schedule_notrace+0x2
New arch_this_cpu_add() looks like this:
00000000000000c0 <bar>:
c0: c0 04 00 00 00 00 jgnop c0 <bar>
c6: c4 38 00 00 00 00 lgrl %r3,c6 <bar+0x6>
c8: R_390_GOTENT foo+0x2
cc: b9 04 00 43 lgr %r4,%r3
d0: eb 00 43 c0 00 52 mviy 960(%r0),4
d6: e3 40 03 b8 00 08 ag %r4,952
dc: eb 52 40 00 00 e8 laag %r5,%r2,0(%r4)
e2: eb 00 03 c0 00 52 mviy 960,0
e8: c0 f4 00 00 00 00 jg e8 <bar+0x28>
ea: R_390_PC32DBL __s390_indirect_jump_r14+0x2
Note that the conditional function call is removed.
Acked-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
| -rw-r--r-- | arch/s390/include/asm/percpu.h | 65 |
1 files changed, 43 insertions, 22 deletions
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 79d5a4460b18..9140d81b7efc 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -135,28 +135,49 @@ #else /* MARCH_HAS_Z196_FEATURES */ -#define arch_this_cpu_add(pcp, val, op1, op2, szcast) \ -do { \ - typedef typeof(pcp) pcp_op_T__; \ - pcp_op_T__ val__ = (val); \ - pcp_op_T__ old__, *ptr__; \ - preempt_disable_notrace(); \ - ptr__ = raw_cpu_ptr(&(pcp)); \ - if (__builtin_constant_p(val__) && \ - ((szcast)val__ > -129) && ((szcast)val__ < 128)) { \ - asm volatile( \ - op2 " %[ptr__],%[val__]" \ - : [ptr__] "+Q" (*ptr__) \ - : [val__] "i" ((szcast)val__) \ - : "cc"); \ - } else { \ - asm volatile( \ - op1 " %[old__],%[val__],%[ptr__]" \ - : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ - : [val__] "d" (val__) \ - : "cc"); \ - } \ - preempt_enable_notrace(); \ +#define arch_this_cpu_add(pcp, val, op1, op2, szcast) \ +do { \ + unsigned long lc_pcpr, lc_pcpo; \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ val__ = (val); \ + pcp_op_T__ old__, *ptr__; \ + \ + lc_pcpr = offsetof(struct lowcore, percpu_register); \ + lc_pcpo = offsetof(struct lowcore, percpu_offset); \ + ptr__ = PERCPU_PTR(&(pcp)); \ + if (__builtin_constant_p(val__) && \ + ((szcast)val__ > -129) && ((szcast)val__ < 128)) { \ + asm volatile( \ + MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\ + AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]") \ + op2 " 0(%[ptr__]),%[val__]\n" \ + MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]") \ + : [ptr__] "+&a" (ptr__), "+m" (*ptr__), \ + "=m" (((struct lowcore *)0)->percpu_register) \ + : [val__] "i" ((szcast)val__), \ + [disppcpr] "i" (lc_pcpr), \ + [disppcpo] "i" (lc_pcpo), \ + [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS), \ + [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS), \ + "m" (((struct lowcore *)0)->percpu_offset) \ + : "cc"); \ + } else { \ + asm volatile( \ + MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\ + AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]") \ + op1 " %[old__],%[val__],0(%[ptr__])\n" \ + MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]") \ + : [old__] "=&d" (old__), \ + [ptr__] "+&a" (ptr__), "+m" (*ptr__), \ + "=m" (((struct lowcore *)0)->percpu_register) \ + : [val__] "d" (val__), \ + [disppcpr] "i" (lc_pcpr), \ + [disppcpo] "i" (lc_pcpo), \ + [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS), \ + [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS), \ + "m" (((struct lowcore *)0)->percpu_offset) \ + : "cc"); \ + } \ } while (0) #define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int) |
