summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBibo Mao <maobibo@loongson.cn>2026-04-09 13:56:36 +0300
committerHuacai Chen <chenhuacai@loongson.cn>2026-04-09 13:56:36 +0300
commitc43dce6f13fb12144571c168c7a593e5e546f3b5 (patch)
treeaf24b8f8d1576f5b77116b75181bcdc4df68c264
parentaac656857e9f008a014ac9d58aab66e8fc803604 (diff)
downloadlinux-c43dce6f13fb12144571c168c7a593e5e546f3b5.tar.xz
LoongArch: KVM: Make vcpu_is_preempted() as a macro rather than function
vcpu_is_preempted() is performance sensitive that called in function osq_lock(), here make it as a macro. So that parameter is not parsed at most time, it can avoid cache line thrashing across numa nodes. Here is part of UnixBench result on Loongson-3C5000 DualWay machine with 32 cores and 2 numa nodes. original inline macro execl 7025.7 6991.2 7242.3 fstime 474.6 703.1 1071 From the test result, making vcpu_is_preempted() as a macro is the best, and there is some improvment compared with the original function method. Signed-off-by: Bibo Mao <maobibo@loongson.cn> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
-rw-r--r--arch/loongarch/include/asm/qspinlock.h26
-rw-r--r--arch/loongarch/kernel/paravirt.c16
2 files changed, 24 insertions, 18 deletions
diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h
index 66244801db67..0ee15b3b3937 100644
--- a/arch/loongarch/include/asm/qspinlock.h
+++ b/arch/loongarch/include/asm/qspinlock.h
@@ -2,11 +2,13 @@
#ifndef _ASM_LOONGARCH_QSPINLOCK_H
#define _ASM_LOONGARCH_QSPINLOCK_H
+#include <asm/kvm_para.h>
#include <linux/jump_label.h>
#ifdef CONFIG_PARAVIRT
-
+DECLARE_STATIC_KEY_FALSE(virt_preempt_key);
DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
+DECLARE_PER_CPU(struct kvm_steal_time, steal_time);
#define virt_spin_lock virt_spin_lock
@@ -34,9 +36,25 @@ __retry:
return true;
}
-#define vcpu_is_preempted vcpu_is_preempted
-
-bool vcpu_is_preempted(int cpu);
+/*
+ * Macro is better than inline function here
+ * With macro, parameter cpu is parsed only when it is used.
+ * With inline function, parameter cpu is parsed even though it is not used.
+ * This may cause cache line thrashing across NUMA nodes.
+ */
+#define vcpu_is_preempted(cpu) \
+({ \
+ bool __val; \
+ \
+ if (!static_branch_unlikely(&virt_preempt_key)) \
+ __val = false; \
+ else { \
+ struct kvm_steal_time *src; \
+ src = &per_cpu(steal_time, cpu); \
+ __val = !!(READ_ONCE(src->preempted) & KVM_VCPU_PREEMPTED); \
+ } \
+ __val; \
+})
#endif /* CONFIG_PARAVIRT */
diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c
index b74fe6db49ab..10821cce554c 100644
--- a/arch/loongarch/kernel/paravirt.c
+++ b/arch/loongarch/kernel/paravirt.c
@@ -10,9 +10,9 @@
#include <asm/paravirt.h>
static int has_steal_clock;
-static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
-static DEFINE_STATIC_KEY_FALSE(virt_preempt_key);
+DEFINE_STATIC_KEY_FALSE(virt_preempt_key);
DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key);
+DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
static bool steal_acc = true;
@@ -260,18 +260,6 @@ static int pv_time_cpu_down_prepare(unsigned int cpu)
return 0;
}
-
-bool vcpu_is_preempted(int cpu)
-{
- struct kvm_steal_time *src;
-
- if (!static_branch_unlikely(&virt_preempt_key))
- return false;
-
- src = &per_cpu(steal_time, cpu);
- return !!(src->preempted & KVM_VCPU_PREEMPTED);
-}
-EXPORT_SYMBOL(vcpu_is_preempted);
#endif
static void pv_cpu_reboot(void *unused)