From 383f2835eb9afb723af71850037b2f074ac9db60 Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Fri, 9 Sep 2005 13:02:02 -0700 Subject: [PATCH] Prefetch kernel stacks to speed up context switch For architecture like ia64, the switch stack structure is fairly large (currently 528 bytes). For context switch intensive application, we found that significant amount of cache misses occurs in switch_to() function. The following patch adds a hook in the schedule() function to prefetch switch stack structure as soon as 'next' task is determined. This allows maximum overlap in prefetch cache lines for that structure. Signed-off-by: Ken Chen Cc: Ingo Molnar Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/entry.S | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'arch/ia64/kernel/entry.S') diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 3c8821024509..915e12791836 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -470,6 +470,29 @@ ENTRY(load_switch_stack) br.cond.sptk.many b7 END(load_switch_stack) +GLOBAL_ENTRY(prefetch_stack) + add r14 = -IA64_SWITCH_STACK_SIZE, sp + add r15 = IA64_TASK_THREAD_KSP_OFFSET, in0 + ;; + ld8 r16 = [r15] // load next's stack pointer + lfetch.fault.excl [r14], 128 + ;; + lfetch.fault.excl [r14], 128 + lfetch.fault [r16], 128 + ;; + lfetch.fault.excl [r14], 128 + lfetch.fault [r16], 128 + ;; + lfetch.fault.excl [r14], 128 + lfetch.fault [r16], 128 + ;; + lfetch.fault.excl [r14], 128 + lfetch.fault [r16], 128 + ;; + lfetch.fault [r16], 128 + br.ret.sptk.many rp +END(prefetch_switch_stack) + GLOBAL_ENTRY(execve) mov r15=__NR_execve // put syscall number in place break __BREAK_SYSCALL -- cgit v1.2.3