diff options
| author | Bibo Mao <maobibo@loongson.cn> | 2026-04-09 13:56:36 +0300 |
|---|---|---|
| committer | Huacai Chen <chenhuacai@loongson.cn> | 2026-04-09 13:56:36 +0300 |
| commit | c43dce6f13fb12144571c168c7a593e5e546f3b5 (patch) | |
| tree | af24b8f8d1576f5b77116b75181bcdc4df68c264 | |
| parent | aac656857e9f008a014ac9d58aab66e8fc803604 (diff) | |
| download | linux-c43dce6f13fb12144571c168c7a593e5e546f3b5.tar.xz | |
LoongArch: KVM: Make vcpu_is_preempted() as a macro rather than function
vcpu_is_preempted() is performance sensitive that called in function
osq_lock(), here make it as a macro. So that parameter is not parsed
at most time, it can avoid cache line thrashing across numa nodes.
Here is part of UnixBench result on Loongson-3C5000 DualWay machine with
32 cores and 2 numa nodes.
original inline macro
execl 7025.7 6991.2 7242.3
fstime 474.6 703.1 1071
From the test result, making vcpu_is_preempted() as a macro is the best,
and there is some improvment compared with the original function method.
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
| -rw-r--r-- | arch/loongarch/include/asm/qspinlock.h | 26 | ||||
| -rw-r--r-- | arch/loongarch/kernel/paravirt.c | 16 |
2 files changed, 24 insertions, 18 deletions
diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h index 66244801db67..0ee15b3b3937 100644 --- a/arch/loongarch/include/asm/qspinlock.h +++ b/arch/loongarch/include/asm/qspinlock.h @@ -2,11 +2,13 @@ #ifndef _ASM_LOONGARCH_QSPINLOCK_H #define _ASM_LOONGARCH_QSPINLOCK_H +#include <asm/kvm_para.h> #include <linux/jump_label.h> #ifdef CONFIG_PARAVIRT - +DECLARE_STATIC_KEY_FALSE(virt_preempt_key); DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key); +DECLARE_PER_CPU(struct kvm_steal_time, steal_time); #define virt_spin_lock virt_spin_lock @@ -34,9 +36,25 @@ __retry: return true; } -#define vcpu_is_preempted vcpu_is_preempted - -bool vcpu_is_preempted(int cpu); +/* + * Macro is better than inline function here + * With macro, parameter cpu is parsed only when it is used. + * With inline function, parameter cpu is parsed even though it is not used. + * This may cause cache line thrashing across NUMA nodes. + */ +#define vcpu_is_preempted(cpu) \ +({ \ + bool __val; \ + \ + if (!static_branch_unlikely(&virt_preempt_key)) \ + __val = false; \ + else { \ + struct kvm_steal_time *src; \ + src = &per_cpu(steal_time, cpu); \ + __val = !!(READ_ONCE(src->preempted) & KVM_VCPU_PREEMPTED); \ + } \ + __val; \ +}) #endif /* CONFIG_PARAVIRT */ diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c index b74fe6db49ab..10821cce554c 100644 --- a/arch/loongarch/kernel/paravirt.c +++ b/arch/loongarch/kernel/paravirt.c @@ -10,9 +10,9 @@ #include <asm/paravirt.h> static int has_steal_clock; -static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); -static DEFINE_STATIC_KEY_FALSE(virt_preempt_key); +DEFINE_STATIC_KEY_FALSE(virt_preempt_key); DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key); +DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); static bool steal_acc = true; @@ -260,18 +260,6 @@ static int pv_time_cpu_down_prepare(unsigned int cpu) return 0; } - -bool vcpu_is_preempted(int cpu) -{ - struct kvm_steal_time *src; - - if (!static_branch_unlikely(&virt_preempt_key)) - return false; - - src = &per_cpu(steal_time, cpu); - return !!(src->preempted & KVM_VCPU_PREEMPTED); -} -EXPORT_SYMBOL(vcpu_is_preempted); #endif static void pv_cpu_reboot(void *unused) |
