From 8446f1d391f3d27e6bf9c43d4cbcdac0ca720417 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 6 Sep 2005 15:16:27 -0700 Subject: [PATCH] detect soft lockups This patch adds a new kernel debug feature: CONFIG_DETECT_SOFTLOCKUP. When enabled then per-CPU watchdog threads are started, which try to run once per second. If they get delayed for more than 10 seconds then a callback from the timer interrupt detects this condition and prints out a warning message and a stack dump (once per lockup incident). The feature is otherwise non-intrusive, it doesnt try to unlock the box in any way, it only gets the debug info out, automatically, and on all CPUs affected by the lockup. Signed-off-by: Ingo Molnar Signed-off-by: Nishanth Aravamudan Signed-Off-By: Matthias Urlichs Signed-off-by: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index dec5827c7742..5fb31bede103 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -176,6 +176,23 @@ extern void trap_init(void); extern void update_process_times(int user); extern void scheduler_tick(void); +#ifdef CONFIG_DETECT_SOFTLOCKUP +extern void softlockup_tick(struct pt_regs *regs); +extern void spawn_softlockup_task(void); +extern void touch_softlockup_watchdog(void); +#else +static inline void softlockup_tick(struct pt_regs *regs) +{ +} +static inline void spawn_softlockup_task(void) +{ +} +static inline void touch_softlockup_watchdog(void) +{ +} +#endif + + /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) /* Is this address in the __sched functions? */ -- cgit v1.2.3 From 36d57ac4a818cb4aa3edbdf63ad2ebc31106f925 Mon Sep 17 00:00:00 2001 From: "H. J. Lu" Date: Tue, 6 Sep 2005 15:16:49 -0700 Subject: [PATCH] auxiliary vector cleanups The size of auxiliary vector is fixed at 42 in linux/sched.h. But it isn't very obvious when looking at linux/elf.h. This patch adds AT_VECTOR_SIZE so that we can change it if necessary when a new vector is added. Because of include file ordering problems, doing this necessitated the extraction of the AT_* symbols into a standalone header file. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-alpha/auxvec.h | 24 ++++++++++++++++++++++++ include/asm-alpha/elf.h | 22 ++-------------------- include/asm-arm/auxvec.h | 4 ++++ include/asm-arm26/auxvec.h | 4 ++++ include/asm-cris/auxvec.h | 4 ++++ include/asm-frv/auxvec.h | 4 ++++ include/asm-h8300/auxvec.h | 4 ++++ include/asm-i386/auxvec.h | 11 +++++++++++ include/asm-i386/elf.h | 8 +------- include/asm-ia64/auxvec.h | 11 +++++++++++ include/asm-ia64/elf.h | 8 +------- include/asm-m32r/auxvec.h | 4 ++++ include/asm-m68k/auxvec.h | 4 ++++ include/asm-m68knommu/auxvec.h | 4 ++++ include/asm-mips/auxvec.h | 4 ++++ include/asm-parisc/auxvec.h | 4 ++++ include/asm-ppc/auxvec.h | 14 ++++++++++++++ include/asm-ppc/elf.h | 11 +---------- include/asm-ppc64/auxvec.h | 19 +++++++++++++++++++ include/asm-ppc64/elf.h | 16 +--------------- include/asm-s390/auxvec.h | 4 ++++ include/asm-sh/auxvec.h | 4 ++++ include/asm-sh64/auxvec.h | 4 ++++ include/asm-sparc/auxvec.h | 4 ++++ include/asm-sparc64/auxvec.h | 4 ++++ include/asm-um/auxvec.h | 4 ++++ include/asm-v850/auxvec.h | 4 ++++ include/asm-x86_64/auxvec.h | 4 ++++ include/asm-xtensa/auxvec.h | 4 ++++ include/linux/auxvec.h | 31 +++++++++++++++++++++++++++++++ include/linux/elf.h | 24 +----------------------- include/linux/sched.h | 4 +++- 32 files changed, 196 insertions(+), 83 deletions(-) create mode 100644 include/asm-alpha/auxvec.h create mode 100644 include/asm-arm/auxvec.h create mode 100644 include/asm-arm26/auxvec.h create mode 100644 include/asm-cris/auxvec.h create mode 100644 include/asm-frv/auxvec.h create mode 100644 include/asm-h8300/auxvec.h create mode 100644 include/asm-i386/auxvec.h create mode 100644 include/asm-ia64/auxvec.h create mode 100644 include/asm-m32r/auxvec.h create mode 100644 include/asm-m68k/auxvec.h create mode 100644 include/asm-m68knommu/auxvec.h create mode 100644 include/asm-mips/auxvec.h create mode 100644 include/asm-parisc/auxvec.h create mode 100644 include/asm-ppc/auxvec.h create mode 100644 include/asm-ppc64/auxvec.h create mode 100644 include/asm-s390/auxvec.h create mode 100644 include/asm-sh/auxvec.h create mode 100644 include/asm-sh64/auxvec.h create mode 100644 include/asm-sparc/auxvec.h create mode 100644 include/asm-sparc64/auxvec.h create mode 100644 include/asm-um/auxvec.h create mode 100644 include/asm-v850/auxvec.h create mode 100644 include/asm-x86_64/auxvec.h create mode 100644 include/asm-xtensa/auxvec.h create mode 100644 include/linux/auxvec.h (limited to 'include/linux/sched.h') diff --git a/include/asm-alpha/auxvec.h b/include/asm-alpha/auxvec.h new file mode 100644 index 000000000000..e96fe880e310 --- /dev/null +++ b/include/asm-alpha/auxvec.h @@ -0,0 +1,24 @@ +#ifndef __ASM_ALPHA_AUXVEC_H +#define __ASM_ALPHA_AUXVEC_H + +/* Reserve these numbers for any future use of a VDSO. */ +#if 0 +#define AT_SYSINFO 32 +#define AT_SYSINFO_EHDR 33 +#endif + +/* More complete cache descriptions than AT_[DIU]CACHEBSIZE. If the + value is -1, then the cache doesn't exist. Otherwise: + + bit 0-3: Cache set-associativity; 0 means fully associative. + bit 4-7: Log2 of cacheline size. + bit 8-31: Size of the entire cache >> 8. + bit 32-63: Reserved. +*/ + +#define AT_L1I_CACHESHAPE 34 +#define AT_L1D_CACHESHAPE 35 +#define AT_L2_CACHESHAPE 36 +#define AT_L3_CACHESHAPE 37 + +#endif /* __ASM_ALPHA_AUXVEC_H */ diff --git a/include/asm-alpha/elf.h b/include/asm-alpha/elf.h index e94a945a2314..6c2d78fba264 100644 --- a/include/asm-alpha/elf.h +++ b/include/asm-alpha/elf.h @@ -1,6 +1,8 @@ #ifndef __ASM_ALPHA_ELF_H #define __ASM_ALPHA_ELF_H +#include + /* Special values for the st_other field in the symbol table. */ #define STO_ALPHA_NOPV 0x80 @@ -142,26 +144,6 @@ extern int dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task); : amask (AMASK_CIX) ? "ev6" : "ev67"); \ }) -/* Reserve these numbers for any future use of a VDSO. */ -#if 0 -#define AT_SYSINFO 32 -#define AT_SYSINFO_EHDR 33 -#endif - -/* More complete cache descriptions than AT_[DIU]CACHEBSIZE. If the - value is -1, then the cache doesn't exist. Otherwise: - - bit 0-3: Cache set-associativity; 0 means fully associative. - bit 4-7: Log2 of cacheline size. - bit 8-31: Size of the entire cache >> 8. - bit 32-63: Reserved. -*/ - -#define AT_L1I_CACHESHAPE 34 -#define AT_L1D_CACHESHAPE 35 -#define AT_L2_CACHESHAPE 36 -#define AT_L3_CACHESHAPE 37 - #ifdef __KERNEL__ #define SET_PERSONALITY(EX, IBCS2) \ diff --git a/include/asm-arm/auxvec.h b/include/asm-arm/auxvec.h new file mode 100644 index 000000000000..c0536f6b29a7 --- /dev/null +++ b/include/asm-arm/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMARM_AUXVEC_H +#define __ASMARM_AUXVEC_H + +#endif diff --git a/include/asm-arm26/auxvec.h b/include/asm-arm26/auxvec.h new file mode 100644 index 000000000000..c0536f6b29a7 --- /dev/null +++ b/include/asm-arm26/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMARM_AUXVEC_H +#define __ASMARM_AUXVEC_H + +#endif diff --git a/include/asm-cris/auxvec.h b/include/asm-cris/auxvec.h new file mode 100644 index 000000000000..cb30b01bf19f --- /dev/null +++ b/include/asm-cris/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMCRIS_AUXVEC_H +#define __ASMCRIS_AUXVEC_H + +#endif diff --git a/include/asm-frv/auxvec.h b/include/asm-frv/auxvec.h new file mode 100644 index 000000000000..07710778fa10 --- /dev/null +++ b/include/asm-frv/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __FRV_AUXVEC_H +#define __FRV_AUXVEC_H + +#endif diff --git a/include/asm-h8300/auxvec.h b/include/asm-h8300/auxvec.h new file mode 100644 index 000000000000..1d36fe38b088 --- /dev/null +++ b/include/asm-h8300/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMH8300_AUXVEC_H +#define __ASMH8300_AUXVEC_H + +#endif diff --git a/include/asm-i386/auxvec.h b/include/asm-i386/auxvec.h new file mode 100644 index 000000000000..395e13016bfb --- /dev/null +++ b/include/asm-i386/auxvec.h @@ -0,0 +1,11 @@ +#ifndef __ASMi386_AUXVEC_H +#define __ASMi386_AUXVEC_H + +/* + * Architecture-neutral AT_ values in 0-17, leave some room + * for more of them, start the x86-specific ones at 32. + */ +#define AT_SYSINFO 32 +#define AT_SYSINFO_EHDR 33 + +#endif diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h index 130bdc8c68cf..fa11117d3cfa 100644 --- a/include/asm-i386/elf.h +++ b/include/asm-i386/elf.h @@ -9,6 +9,7 @@ #include #include #include /* for savesegment */ +#include #include @@ -109,13 +110,6 @@ typedef struct user_fxsr_struct elf_fpxregset_t; #define ELF_PLATFORM (system_utsname.machine) -/* - * Architecture-neutral AT_ values in 0-17, leave some room - * for more of them, start the x86-specific ones at 32. - */ -#define AT_SYSINFO 32 -#define AT_SYSINFO_EHDR 33 - #ifdef __KERNEL__ #define SET_PERSONALITY(ex, ibcs2) do { } while (0) diff --git a/include/asm-ia64/auxvec.h b/include/asm-ia64/auxvec.h new file mode 100644 index 000000000000..23cebe5685b9 --- /dev/null +++ b/include/asm-ia64/auxvec.h @@ -0,0 +1,11 @@ +#ifndef _ASM_IA64_AUXVEC_H +#define _ASM_IA64_AUXVEC_H + +/* + * Architecture-neutral AT_ values are in the range 0-17. Leave some room for more of + * them, start the architecture-specific ones at 32. + */ +#define AT_SYSINFO 32 +#define AT_SYSINFO_EHDR 33 + +#endif /* _ASM_IA64_AUXVEC_H */ diff --git a/include/asm-ia64/elf.h b/include/asm-ia64/elf.h index 7d4ccc4b976e..446fce036fd9 100644 --- a/include/asm-ia64/elf.h +++ b/include/asm-ia64/elf.h @@ -12,6 +12,7 @@ #include #include +#include /* * This is used to ensure we don't load something for the wrong architecture. @@ -177,13 +178,6 @@ extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst); relevant until we have real hardware to play with... */ #define ELF_PLATFORM NULL -/* - * Architecture-neutral AT_ values are in the range 0-17. Leave some room for more of - * them, start the architecture-specific ones at 32. - */ -#define AT_SYSINFO 32 -#define AT_SYSINFO_EHDR 33 - #ifdef __KERNEL__ #define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX) #define elf_read_implies_exec(ex, executable_stack) \ diff --git a/include/asm-m32r/auxvec.h b/include/asm-m32r/auxvec.h new file mode 100644 index 000000000000..f76dcc860fae --- /dev/null +++ b/include/asm-m32r/auxvec.h @@ -0,0 +1,4 @@ +#ifndef _ASM_M32R__AUXVEC_H +#define _ASM_M32R__AUXVEC_H + +#endif /* _ASM_M32R__AUXVEC_H */ diff --git a/include/asm-m68k/auxvec.h b/include/asm-m68k/auxvec.h new file mode 100644 index 000000000000..844d6d52204b --- /dev/null +++ b/include/asm-m68k/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMm68k_AUXVEC_H +#define __ASMm68k_AUXVEC_H + +#endif diff --git a/include/asm-m68knommu/auxvec.h b/include/asm-m68knommu/auxvec.h new file mode 100644 index 000000000000..844d6d52204b --- /dev/null +++ b/include/asm-m68knommu/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMm68k_AUXVEC_H +#define __ASMm68k_AUXVEC_H + +#endif diff --git a/include/asm-mips/auxvec.h b/include/asm-mips/auxvec.h new file mode 100644 index 000000000000..7cf7f2d21943 --- /dev/null +++ b/include/asm-mips/auxvec.h @@ -0,0 +1,4 @@ +#ifndef _ASM_AUXVEC_H +#define _ASM_AUXVEC_H + +#endif /* _ASM_AUXVEC_H */ diff --git a/include/asm-parisc/auxvec.h b/include/asm-parisc/auxvec.h new file mode 100644 index 000000000000..9c3ac4b89dc9 --- /dev/null +++ b/include/asm-parisc/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMPARISC_AUXVEC_H +#define __ASMPARISC_AUXVEC_H + +#endif diff --git a/include/asm-ppc/auxvec.h b/include/asm-ppc/auxvec.h new file mode 100644 index 000000000000..172358df29c8 --- /dev/null +++ b/include/asm-ppc/auxvec.h @@ -0,0 +1,14 @@ +#ifndef __PPC_AUXVEC_H +#define __PPC_AUXVEC_H + +/* + * We need to put in some extra aux table entries to tell glibc what + * the cache block size is, so it can use the dcbz instruction safely. + */ +#define AT_DCACHEBSIZE 19 +#define AT_ICACHEBSIZE 20 +#define AT_UCACHEBSIZE 21 +/* A special ignored type value for PPC, for glibc compatibility. */ +#define AT_IGNOREPPC 22 + +#endif diff --git a/include/asm-ppc/elf.h b/include/asm-ppc/elf.h index 2c056966efd3..c25cc35e6ab5 100644 --- a/include/asm-ppc/elf.h +++ b/include/asm-ppc/elf.h @@ -7,6 +7,7 @@ #include #include #include +#include /* PowerPC relocations defined by the ABIs */ #define R_PPC_NONE 0 @@ -122,16 +123,6 @@ extern int dump_task_fpu(struct task_struct *t, elf_fpregset_t *fpu); #define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX) -/* - * We need to put in some extra aux table entries to tell glibc what - * the cache block size is, so it can use the dcbz instruction safely. - */ -#define AT_DCACHEBSIZE 19 -#define AT_ICACHEBSIZE 20 -#define AT_UCACHEBSIZE 21 -/* A special ignored type value for PPC, for glibc compatibility. */ -#define AT_IGNOREPPC 22 - extern int dcache_bsize; extern int icache_bsize; extern int ucache_bsize; diff --git a/include/asm-ppc64/auxvec.h b/include/asm-ppc64/auxvec.h new file mode 100644 index 000000000000..ac6381a106e1 --- /dev/null +++ b/include/asm-ppc64/auxvec.h @@ -0,0 +1,19 @@ +#ifndef __PPC64_AUXVEC_H +#define __PPC64_AUXVEC_H + +/* + * We need to put in some extra aux table entries to tell glibc what + * the cache block size is, so it can use the dcbz instruction safely. + */ +#define AT_DCACHEBSIZE 19 +#define AT_ICACHEBSIZE 20 +#define AT_UCACHEBSIZE 21 +/* A special ignored type value for PPC, for glibc compatibility. */ +#define AT_IGNOREPPC 22 + +/* The vDSO location. We have to use the same value as x86 for glibc's + * sake :-) + */ +#define AT_SYSINFO_EHDR 33 + +#endif /* __PPC64_AUXVEC_H */ diff --git a/include/asm-ppc64/elf.h b/include/asm-ppc64/elf.h index 085eedb956fe..c919a89343db 100644 --- a/include/asm-ppc64/elf.h +++ b/include/asm-ppc64/elf.h @@ -4,6 +4,7 @@ #include #include #include +#include /* PowerPC relocations defined by the ABIs */ #define R_PPC_NONE 0 @@ -237,21 +238,6 @@ do { \ #endif -/* - * We need to put in some extra aux table entries to tell glibc what - * the cache block size is, so it can use the dcbz instruction safely. - */ -#define AT_DCACHEBSIZE 19 -#define AT_ICACHEBSIZE 20 -#define AT_UCACHEBSIZE 21 -/* A special ignored type value for PPC, for glibc compatibility. */ -#define AT_IGNOREPPC 22 - -/* The vDSO location. We have to use the same value as x86 for glibc's - * sake :-) - */ -#define AT_SYSINFO_EHDR 33 - extern int dcache_bsize; extern int icache_bsize; extern int ucache_bsize; diff --git a/include/asm-s390/auxvec.h b/include/asm-s390/auxvec.h new file mode 100644 index 000000000000..0d340720fd99 --- /dev/null +++ b/include/asm-s390/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMS390_AUXVEC_H +#define __ASMS390_AUXVEC_H + +#endif diff --git a/include/asm-sh/auxvec.h b/include/asm-sh/auxvec.h new file mode 100644 index 000000000000..fc21e4db5881 --- /dev/null +++ b/include/asm-sh/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASM_SH_AUXVEC_H +#define __ASM_SH_AUXVEC_H + +#endif /* __ASM_SH_AUXVEC_H */ diff --git a/include/asm-sh64/auxvec.h b/include/asm-sh64/auxvec.h new file mode 100644 index 000000000000..1ad5a44bdc76 --- /dev/null +++ b/include/asm-sh64/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASM_SH64_AUXVEC_H +#define __ASM_SH64_AUXVEC_H + +#endif /* __ASM_SH64_AUXVEC_H */ diff --git a/include/asm-sparc/auxvec.h b/include/asm-sparc/auxvec.h new file mode 100644 index 000000000000..ad6f360261f6 --- /dev/null +++ b/include/asm-sparc/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMSPARC_AUXVEC_H +#define __ASMSPARC_AUXVEC_H + +#endif /* !(__ASMSPARC_AUXVEC_H) */ diff --git a/include/asm-sparc64/auxvec.h b/include/asm-sparc64/auxvec.h new file mode 100644 index 000000000000..436a29129828 --- /dev/null +++ b/include/asm-sparc64/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASM_SPARC64_AUXVEC_H +#define __ASM_SPARC64_AUXVEC_H + +#endif /* !(__ASM_SPARC64_AUXVEC_H) */ diff --git a/include/asm-um/auxvec.h b/include/asm-um/auxvec.h new file mode 100644 index 000000000000..1e5e1c2fc9b1 --- /dev/null +++ b/include/asm-um/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __UM_AUXVEC_H +#define __UM_AUXVEC_H + +#endif diff --git a/include/asm-v850/auxvec.h b/include/asm-v850/auxvec.h new file mode 100644 index 000000000000..f493232d0224 --- /dev/null +++ b/include/asm-v850/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __V850_AUXVEC_H__ +#define __V850_AUXVEC_H__ + +#endif /* __V850_AUXVEC_H__ */ diff --git a/include/asm-x86_64/auxvec.h b/include/asm-x86_64/auxvec.h new file mode 100644 index 000000000000..2403c4cfced2 --- /dev/null +++ b/include/asm-x86_64/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASM_X86_64_AUXVEC_H +#define __ASM_X86_64_AUXVEC_H + +#endif diff --git a/include/asm-xtensa/auxvec.h b/include/asm-xtensa/auxvec.h new file mode 100644 index 000000000000..257dec75c5af --- /dev/null +++ b/include/asm-xtensa/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __XTENSA_AUXVEC_H +#define __XTENSA_AUXVEC_H + +#endif diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h new file mode 100644 index 000000000000..9a7b374c9fb4 --- /dev/null +++ b/include/linux/auxvec.h @@ -0,0 +1,31 @@ +#ifndef _LINUX_AUXVEC_H +#define _LINUX_AUXVEC_H + +#include + +/* Symbolic values for the entries in the auxiliary table + put on the initial stack */ +#define AT_NULL 0 /* end of vector */ +#define AT_IGNORE 1 /* entry should be ignored */ +#define AT_EXECFD 2 /* file descriptor of program */ +#define AT_PHDR 3 /* program headers for program */ +#define AT_PHENT 4 /* size of program header entry */ +#define AT_PHNUM 5 /* number of program headers */ +#define AT_PAGESZ 6 /* system page size */ +#define AT_BASE 7 /* base address of interpreter */ +#define AT_FLAGS 8 /* flags */ +#define AT_ENTRY 9 /* entry point of program */ +#define AT_NOTELF 10 /* program is not ELF */ +#define AT_UID 11 /* real uid */ +#define AT_EUID 12 /* effective uid */ +#define AT_GID 13 /* real gid */ +#define AT_EGID 14 /* effective gid */ +#define AT_PLATFORM 15 /* string identifying CPU for optimizations */ +#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ +#define AT_CLKTCK 17 /* frequency at which times() increments */ + +#define AT_SECURE 23 /* secure mode boolean */ + +#define AT_VECTOR_SIZE 42 /* Size of auxiliary table. */ + +#endif /* _LINUX_AUXVEC_H */ diff --git a/include/linux/elf.h b/include/linux/elf.h index f5b3ba5a317d..ff955dbf510d 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -2,6 +2,7 @@ #define _LINUX_ELF_H #include +#include #include #ifndef elf_read_implies_exec @@ -158,29 +159,6 @@ typedef __s64 Elf64_Sxword; #define ELF64_ST_BIND(x) ELF_ST_BIND(x) #define ELF64_ST_TYPE(x) ELF_ST_TYPE(x) -/* Symbolic values for the entries in the auxiliary table - put on the initial stack */ -#define AT_NULL 0 /* end of vector */ -#define AT_IGNORE 1 /* entry should be ignored */ -#define AT_EXECFD 2 /* file descriptor of program */ -#define AT_PHDR 3 /* program headers for program */ -#define AT_PHENT 4 /* size of program header entry */ -#define AT_PHNUM 5 /* number of program headers */ -#define AT_PAGESZ 6 /* system page size */ -#define AT_BASE 7 /* base address of interpreter */ -#define AT_FLAGS 8 /* flags */ -#define AT_ENTRY 9 /* entry point of program */ -#define AT_NOTELF 10 /* program is not ELF */ -#define AT_UID 11 /* real uid */ -#define AT_EUID 12 /* effective uid */ -#define AT_GID 13 /* real gid */ -#define AT_EGID 14 /* effective gid */ -#define AT_PLATFORM 15 /* string identifying CPU for optimizations */ -#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ -#define AT_CLKTCK 17 /* frequency at which times() increments */ - -#define AT_SECURE 23 /* secure mode boolean */ - typedef struct dynamic{ Elf32_Sword d_tag; union{ diff --git a/include/linux/sched.h b/include/linux/sched.h index 5fb31bede103..b5a22ea80045 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -35,6 +35,8 @@ #include #include +#include /* For AT_VECTOR_SIZE */ + struct exec_domain; /* @@ -261,7 +263,7 @@ struct mm_struct { mm_counter_t _rss; mm_counter_t _anon_rss; - unsigned long saved_auxv[42]; /* for /proc/PID/auxv */ + unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ unsigned dumpable:2; cpumask_t cpu_vm_mask; -- cgit v1.2.3 From 9c1cfda20a508b181bdda8c0045f7c0c333880a5 Mon Sep 17 00:00:00 2001 From: John Hawkes Date: Tue, 6 Sep 2005 15:18:14 -0700 Subject: [PATCH] cpusets: Move the ia64 domain setup code to the generic code Signed-off-by: John Hawkes Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/Makefile | 2 +- arch/ia64/kernel/domain.c | 444 ------------------------------------------- include/asm-ia64/processor.h | 3 - include/asm-ia64/topology.h | 23 --- include/linux/sched.h | 7 - include/linux/topology.h | 23 +++ kernel/sched.c | 290 ++++++++++++++++++++++------ 7 files changed, 260 insertions(+), 532 deletions(-) delete mode 100644 arch/ia64/kernel/domain.c (limited to 'include/linux/sched.h') diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index b242594be55b..307514f7a282 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -16,7 +16,7 @@ obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o obj-$(CONFIG_IA64_PALINFO) += palinfo.o obj-$(CONFIG_IOSAPIC) += iosapic.o obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o +obj-$(CONFIG_SMP) += smp.o smpboot.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o obj-$(CONFIG_IA64_CYCLONE) += cyclone.o diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c deleted file mode 100644 index e907109983f1..000000000000 --- a/arch/ia64/kernel/domain.c +++ /dev/null @@ -1,444 +0,0 @@ -/* - * arch/ia64/kernel/domain.c - * Architecture specific sched-domains builder. - * - * Copyright (C) 2004 Jesse Barnes - * Copyright (C) 2004 Silicon Graphics, Inc. - */ - -#include -#include -#include -#include -#include -#include -#include - -#define SD_NODES_PER_DOMAIN 16 - -#ifdef CONFIG_NUMA -/** - * find_next_best_node - find the next node to include in a sched_domain - * @node: node whose sched_domain we're building - * @used_nodes: nodes already in the sched_domain - * - * Find the next node to include in a given scheduling domain. Simply - * finds the closest node not already in the @used_nodes map. - * - * Should use nodemask_t. - */ -static int find_next_best_node(int node, unsigned long *used_nodes) -{ - int i, n, val, min_val, best_node = 0; - - min_val = INT_MAX; - - for (i = 0; i < MAX_NUMNODES; i++) { - /* Start at @node */ - n = (node + i) % MAX_NUMNODES; - - if (!nr_cpus_node(n)) - continue; - - /* Skip already used nodes */ - if (test_bit(n, used_nodes)) - continue; - - /* Simple min distance search */ - val = node_distance(node, n); - - if (val < min_val) { - min_val = val; - best_node = n; - } - } - - set_bit(best_node, used_nodes); - return best_node; -} - -/** - * sched_domain_node_span - get a cpumask for a node's sched_domain - * @node: node whose cpumask we're constructing - * @size: number of nodes to include in this span - * - * Given a node, construct a good cpumask for its sched_domain to span. It - * should be one that prevents unnecessary balancing, but also spreads tasks - * out optimally. - */ -static cpumask_t sched_domain_node_span(int node) -{ - int i; - cpumask_t span, nodemask; - DECLARE_BITMAP(used_nodes, MAX_NUMNODES); - - cpus_clear(span); - bitmap_zero(used_nodes, MAX_NUMNODES); - - nodemask = node_to_cpumask(node); - cpus_or(span, span, nodemask); - set_bit(node, used_nodes); - - for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { - int next_node = find_next_best_node(node, used_nodes); - nodemask = node_to_cpumask(next_node); - cpus_or(span, span, nodemask); - } - - return span; -} -#endif - -/* - * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we - * can switch it on easily if needed. - */ -#ifdef CONFIG_SCHED_SMT -static DEFINE_PER_CPU(struct sched_domain, cpu_domains); -static struct sched_group sched_group_cpus[NR_CPUS]; -static int cpu_to_cpu_group(int cpu) -{ - return cpu; -} -#endif - -static DEFINE_PER_CPU(struct sched_domain, phys_domains); -static struct sched_group sched_group_phys[NR_CPUS]; -static int cpu_to_phys_group(int cpu) -{ -#ifdef CONFIG_SCHED_SMT - return first_cpu(cpu_sibling_map[cpu]); -#else - return cpu; -#endif -} - -#ifdef CONFIG_NUMA -/* - * The init_sched_build_groups can't handle what we want to do with node - * groups, so roll our own. Now each node has its own list of groups which - * gets dynamically allocated. - */ -static DEFINE_PER_CPU(struct sched_domain, node_domains); -static struct sched_group **sched_group_nodes_bycpu[NR_CPUS]; - -static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); -static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS]; - -static int cpu_to_allnodes_group(int cpu) -{ - return cpu_to_node(cpu); -} -#endif - -/* - * Build sched domains for a given set of cpus and attach the sched domains - * to the individual cpus - */ -void build_sched_domains(const cpumask_t *cpu_map) -{ - int i; -#ifdef CONFIG_NUMA - struct sched_group **sched_group_nodes = NULL; - struct sched_group *sched_group_allnodes = NULL; - - /* - * Allocate the per-node list of sched groups - */ - sched_group_nodes = kmalloc(sizeof(struct sched_group*)*MAX_NUMNODES, - GFP_ATOMIC); - if (!sched_group_nodes) { - printk(KERN_WARNING "Can not alloc sched group node list\n"); - return; - } - sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes; -#endif - - /* - * Set up domains for cpus specified by the cpu_map. - */ - for_each_cpu_mask(i, *cpu_map) { - int group; - struct sched_domain *sd = NULL, *p; - cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); - - cpus_and(nodemask, nodemask, *cpu_map); - -#ifdef CONFIG_NUMA - if (cpus_weight(*cpu_map) - > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { - if (!sched_group_allnodes) { - sched_group_allnodes - = kmalloc(sizeof(struct sched_group) - * MAX_NUMNODES, - GFP_KERNEL); - if (!sched_group_allnodes) { - printk(KERN_WARNING - "Can not alloc allnodes sched group\n"); - break; - } - sched_group_allnodes_bycpu[i] - = sched_group_allnodes; - } - sd = &per_cpu(allnodes_domains, i); - *sd = SD_ALLNODES_INIT; - sd->span = *cpu_map; - group = cpu_to_allnodes_group(i); - sd->groups = &sched_group_allnodes[group]; - p = sd; - } else - p = NULL; - - sd = &per_cpu(node_domains, i); - *sd = SD_NODE_INIT; - sd->span = sched_domain_node_span(cpu_to_node(i)); - sd->parent = p; - cpus_and(sd->span, sd->span, *cpu_map); -#endif - - p = sd; - sd = &per_cpu(phys_domains, i); - group = cpu_to_phys_group(i); - *sd = SD_CPU_INIT; - sd->span = nodemask; - sd->parent = p; - sd->groups = &sched_group_phys[group]; - -#ifdef CONFIG_SCHED_SMT - p = sd; - sd = &per_cpu(cpu_domains, i); - group = cpu_to_cpu_group(i); - *sd = SD_SIBLING_INIT; - sd->span = cpu_sibling_map[i]; - cpus_and(sd->span, sd->span, *cpu_map); - sd->parent = p; - sd->groups = &sched_group_cpus[group]; -#endif - } - -#ifdef CONFIG_SCHED_SMT - /* Set up CPU (sibling) groups */ - for_each_cpu_mask(i, *cpu_map) { - cpumask_t this_sibling_map = cpu_sibling_map[i]; - cpus_and(this_sibling_map, this_sibling_map, *cpu_map); - if (i != first_cpu(this_sibling_map)) - continue; - - init_sched_build_groups(sched_group_cpus, this_sibling_map, - &cpu_to_cpu_group); - } -#endif - - /* Set up physical groups */ - for (i = 0; i < MAX_NUMNODES; i++) { - cpumask_t nodemask = node_to_cpumask(i); - - cpus_and(nodemask, nodemask, *cpu_map); - if (cpus_empty(nodemask)) - continue; - - init_sched_build_groups(sched_group_phys, nodemask, - &cpu_to_phys_group); - } - -#ifdef CONFIG_NUMA - if (sched_group_allnodes) - init_sched_build_groups(sched_group_allnodes, *cpu_map, - &cpu_to_allnodes_group); - - for (i = 0; i < MAX_NUMNODES; i++) { - /* Set up node groups */ - struct sched_group *sg, *prev; - cpumask_t nodemask = node_to_cpumask(i); - cpumask_t domainspan; - cpumask_t covered = CPU_MASK_NONE; - int j; - - cpus_and(nodemask, nodemask, *cpu_map); - if (cpus_empty(nodemask)) { - sched_group_nodes[i] = NULL; - continue; - } - - domainspan = sched_domain_node_span(i); - cpus_and(domainspan, domainspan, *cpu_map); - - sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); - sched_group_nodes[i] = sg; - for_each_cpu_mask(j, nodemask) { - struct sched_domain *sd; - sd = &per_cpu(node_domains, j); - sd->groups = sg; - if (sd->groups == NULL) { - /* Turn off balancing if we have no groups */ - sd->flags = 0; - } - } - if (!sg) { - printk(KERN_WARNING - "Can not alloc domain group for node %d\n", i); - continue; - } - sg->cpu_power = 0; - sg->cpumask = nodemask; - cpus_or(covered, covered, nodemask); - prev = sg; - - for (j = 0; j < MAX_NUMNODES; j++) { - cpumask_t tmp, notcovered; - int n = (i + j) % MAX_NUMNODES; - - cpus_complement(notcovered, covered); - cpus_and(tmp, notcovered, *cpu_map); - cpus_and(tmp, tmp, domainspan); - if (cpus_empty(tmp)) - break; - - nodemask = node_to_cpumask(n); - cpus_and(tmp, tmp, nodemask); - if (cpus_empty(tmp)) - continue; - - sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); - if (!sg) { - printk(KERN_WARNING - "Can not alloc domain group for node %d\n", j); - break; - } - sg->cpu_power = 0; - sg->cpumask = tmp; - cpus_or(covered, covered, tmp); - prev->next = sg; - prev = sg; - } - prev->next = sched_group_nodes[i]; - } -#endif - - /* Calculate CPU power for physical packages and nodes */ - for_each_cpu_mask(i, *cpu_map) { - int power; - struct sched_domain *sd; -#ifdef CONFIG_SCHED_SMT - sd = &per_cpu(cpu_domains, i); - power = SCHED_LOAD_SCALE; - sd->groups->cpu_power = power; -#endif - - sd = &per_cpu(phys_domains, i); - power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * - (cpus_weight(sd->groups->cpumask)-1) / 10; - sd->groups->cpu_power = power; - -#ifdef CONFIG_NUMA - sd = &per_cpu(allnodes_domains, i); - if (sd->groups) { - power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * - (cpus_weight(sd->groups->cpumask)-1) / 10; - sd->groups->cpu_power = power; - } -#endif - } - -#ifdef CONFIG_NUMA - for (i = 0; i < MAX_NUMNODES; i++) { - struct sched_group *sg = sched_group_nodes[i]; - int j; - - if (sg == NULL) - continue; -next_sg: - for_each_cpu_mask(j, sg->cpumask) { - struct sched_domain *sd; - int power; - - sd = &per_cpu(phys_domains, j); - if (j != first_cpu(sd->groups->cpumask)) { - /* - * Only add "power" once for each - * physical package. - */ - continue; - } - power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * - (cpus_weight(sd->groups->cpumask)-1) / 10; - - sg->cpu_power += power; - } - sg = sg->next; - if (sg != sched_group_nodes[i]) - goto next_sg; - } -#endif - - /* Attach the domains */ - for_each_cpu_mask(i, *cpu_map) { - struct sched_domain *sd; -#ifdef CONFIG_SCHED_SMT - sd = &per_cpu(cpu_domains, i); -#else - sd = &per_cpu(phys_domains, i); -#endif - cpu_attach_domain(sd, i); - } -} -/* - * Set up scheduler domains and groups. Callers must hold the hotplug lock. - */ -void arch_init_sched_domains(const cpumask_t *cpu_map) -{ - cpumask_t cpu_default_map; - - /* - * Setup mask for cpus without special case scheduling requirements. - * For now this just excludes isolated cpus, but could be used to - * exclude other special cases in the future. - */ - cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map); - - build_sched_domains(&cpu_default_map); -} - -void arch_destroy_sched_domains(const cpumask_t *cpu_map) -{ -#ifdef CONFIG_NUMA - int i; - int cpu; - - for_each_cpu_mask(cpu, *cpu_map) { - struct sched_group *sched_group_allnodes - = sched_group_allnodes_bycpu[cpu]; - struct sched_group **sched_group_nodes - = sched_group_nodes_bycpu[cpu]; - - if (sched_group_allnodes) { - kfree(sched_group_allnodes); - sched_group_allnodes_bycpu[cpu] = NULL; - } - - if (!sched_group_nodes) - continue; - - for (i = 0; i < MAX_NUMNODES; i++) { - cpumask_t nodemask = node_to_cpumask(i); - struct sched_group *oldsg, *sg = sched_group_nodes[i]; - - cpus_and(nodemask, nodemask, *cpu_map); - if (cpus_empty(nodemask)) - continue; - - if (sg == NULL) - continue; - sg = sg->next; -next_sg: - oldsg = sg; - sg = sg->next; - kfree(oldsg); - if (oldsg != sched_group_nodes[i]) - goto next_sg; - } - kfree(sched_group_nodes); - sched_group_nodes_bycpu[cpu] = NULL; - } -#endif -} diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index 91bbd1f22461..94e07e727395 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h @@ -20,9 +20,6 @@ #include #include -/* Our arch specific arch_init_sched_domain is in arch/ia64/kernel/domain.c */ -#define ARCH_HAS_SCHED_DOMAIN - #define IA64_NUM_DBG_REGS 8 /* * Limits for PMC and PMD are set to less than maximum architected values diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h index 399bc29729fd..a9f738bf18a7 100644 --- a/include/asm-ia64/topology.h +++ b/include/asm-ia64/topology.h @@ -98,29 +98,6 @@ void build_cpu_to_node_map(void); .nr_balance_failed = 0, \ } -/* sched_domains SD_ALLNODES_INIT for IA64 NUMA machines */ -#define SD_ALLNODES_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ - .parent = NULL, \ - .groups = NULL, \ - .min_interval = 64, \ - .max_interval = 64*num_online_cpus(), \ - .busy_factor = 128, \ - .imbalance_pct = 133, \ - .cache_hot_time = (10*1000000), \ - .cache_nice_tries = 1, \ - .busy_idx = 3, \ - .idle_idx = 3, \ - .newidle_idx = 0, /* unused */ \ - .wake_idx = 0, /* unused */ \ - .forkexec_idx = 0, /* unused */ \ - .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE, \ - .last_balance = jiffies, \ - .balance_interval = 64, \ - .nr_balance_failed = 0, \ -} - #endif /* CONFIG_NUMA */ #include diff --git a/include/linux/sched.h b/include/linux/sched.h index b5a22ea80045..ea1b5f32ec5c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -564,13 +564,6 @@ struct sched_domain { extern void partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2); -#ifdef ARCH_HAS_SCHED_DOMAIN -/* Useful helpers that arch setup code may use. Defined in kernel/sched.c */ -extern cpumask_t cpu_isolated_map; -extern void init_sched_build_groups(struct sched_group groups[], - cpumask_t span, int (*group_fn)(int cpu)); -extern void cpu_attach_domain(struct sched_domain *sd, int cpu); -#endif /* ARCH_HAS_SCHED_DOMAIN */ #endif /* CONFIG_SMP */ diff --git a/include/linux/topology.h b/include/linux/topology.h index 0320225e96da..3df1d474e5c5 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -135,6 +135,29 @@ } #endif +/* sched_domains SD_ALLNODES_INIT for NUMA machines */ +#define SD_ALLNODES_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 64, \ + .max_interval = 64*num_online_cpus(), \ + .busy_factor = 128, \ + .imbalance_pct = 133, \ + .cache_hot_time = (10*1000000), \ + .cache_nice_tries = 1, \ + .busy_idx = 3, \ + .idle_idx = 3, \ + .newidle_idx = 0, /* unused */ \ + .wake_idx = 0, /* unused */ \ + .forkexec_idx = 0, /* unused */ \ + .per_cpu_gain = 100, \ + .flags = SD_LOAD_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 64, \ + .nr_balance_failed = 0, \ +} + #ifdef CONFIG_NUMA #ifndef SD_NODE_INIT #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! diff --git a/kernel/sched.c b/kernel/sched.c index 5f889d0cbfcc..50860ad5b624 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4779,7 +4779,7 @@ static int sd_parent_degenerate(struct sched_domain *sd, * Attach the domain 'sd' to 'cpu' as its base domain. Callers must * hold the hotplug lock. */ -void cpu_attach_domain(struct sched_domain *sd, int cpu) +static void cpu_attach_domain(struct sched_domain *sd, int cpu) { runqueue_t *rq = cpu_rq(cpu); struct sched_domain *tmp; @@ -4802,7 +4802,7 @@ void cpu_attach_domain(struct sched_domain *sd, int cpu) } /* cpus with isolated domains */ -cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE; +static cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE; /* Setup the mask of cpus configured for isolated domains */ static int __init isolated_cpu_setup(char *str) @@ -4830,8 +4830,8 @@ __setup ("isolcpus=", isolated_cpu_setup); * covered by the given span, and will set each group's ->cpumask correctly, * and ->cpu_power to 0. */ -void init_sched_build_groups(struct sched_group groups[], - cpumask_t span, int (*group_fn)(int cpu)) +static void init_sched_build_groups(struct sched_group groups[], cpumask_t span, + int (*group_fn)(int cpu)) { struct sched_group *first = NULL, *last = NULL; cpumask_t covered = CPU_MASK_NONE; @@ -4864,12 +4864,85 @@ void init_sched_build_groups(struct sched_group groups[], last->next = first; } +#define SD_NODES_PER_DOMAIN 16 -#ifdef ARCH_HAS_SCHED_DOMAIN -extern void build_sched_domains(const cpumask_t *cpu_map); -extern void arch_init_sched_domains(const cpumask_t *cpu_map); -extern void arch_destroy_sched_domains(const cpumask_t *cpu_map); -#else +#ifdef CONFIG_NUMA +/** + * find_next_best_node - find the next node to include in a sched_domain + * @node: node whose sched_domain we're building + * @used_nodes: nodes already in the sched_domain + * + * Find the next node to include in a given scheduling domain. Simply + * finds the closest node not already in the @used_nodes map. + * + * Should use nodemask_t. + */ +static int find_next_best_node(int node, unsigned long *used_nodes) +{ + int i, n, val, min_val, best_node = 0; + + min_val = INT_MAX; + + for (i = 0; i < MAX_NUMNODES; i++) { + /* Start at @node */ + n = (node + i) % MAX_NUMNODES; + + if (!nr_cpus_node(n)) + continue; + + /* Skip already used nodes */ + if (test_bit(n, used_nodes)) + continue; + + /* Simple min distance search */ + val = node_distance(node, n); + + if (val < min_val) { + min_val = val; + best_node = n; + } + } + + set_bit(best_node, used_nodes); + return best_node; +} + +/** + * sched_domain_node_span - get a cpumask for a node's sched_domain + * @node: node whose cpumask we're constructing + * @size: number of nodes to include in this span + * + * Given a node, construct a good cpumask for its sched_domain to span. It + * should be one that prevents unnecessary balancing, but also spreads tasks + * out optimally. + */ +static cpumask_t sched_domain_node_span(int node) +{ + int i; + cpumask_t span, nodemask; + DECLARE_BITMAP(used_nodes, MAX_NUMNODES); + + cpus_clear(span); + bitmap_zero(used_nodes, MAX_NUMNODES); + + nodemask = node_to_cpumask(node); + cpus_or(span, span, nodemask); + set_bit(node, used_nodes); + + for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { + int next_node = find_next_best_node(node, used_nodes); + nodemask = node_to_cpumask(next_node); + cpus_or(span, span, nodemask); + } + + return span; +} +#endif + +/* + * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we + * can switch it on easily if needed. + */ #ifdef CONFIG_SCHED_SMT static DEFINE_PER_CPU(struct sched_domain, cpu_domains); static struct sched_group sched_group_cpus[NR_CPUS]; @@ -4891,36 +4964,20 @@ static int cpu_to_phys_group(int cpu) } #ifdef CONFIG_NUMA - -static DEFINE_PER_CPU(struct sched_domain, node_domains); -static struct sched_group sched_group_nodes[MAX_NUMNODES]; -static int cpu_to_node_group(int cpu) -{ - return cpu_to_node(cpu); -} -#endif - -#if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA) /* - * The domains setup code relies on siblings not spanning - * multiple nodes. Make sure the architecture has a proper - * siblings map: + * The init_sched_build_groups can't handle what we want to do with node + * groups, so roll our own. Now each node has its own list of groups which + * gets dynamically allocated. */ -static void check_sibling_maps(void) -{ - int i, j; +static DEFINE_PER_CPU(struct sched_domain, node_domains); +static struct sched_group *sched_group_nodes[MAX_NUMNODES]; - for_each_online_cpu(i) { - for_each_cpu_mask(j, cpu_sibling_map[i]) { - if (cpu_to_node(i) != cpu_to_node(j)) { - printk(KERN_INFO "warning: CPU %d siblings map " - "to different node - isolating " - "them.\n", i); - cpu_sibling_map[i] = cpumask_of_cpu(i); - break; - } - } - } +static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); +static struct sched_group sched_group_allnodes[MAX_NUMNODES]; + +static int cpu_to_allnodes_group(int cpu) +{ + return cpu_to_node(cpu); } #endif @@ -4928,7 +4985,7 @@ static void check_sibling_maps(void) * Build sched domains for a given set of cpus and attach the sched domains * to the individual cpus */ -static void build_sched_domains(const cpumask_t *cpu_map) +void build_sched_domains(const cpumask_t *cpu_map) { int i; @@ -4943,11 +5000,22 @@ static void build_sched_domains(const cpumask_t *cpu_map) cpus_and(nodemask, nodemask, *cpu_map); #ifdef CONFIG_NUMA + if (num_online_cpus() + > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { + sd = &per_cpu(allnodes_domains, i); + *sd = SD_ALLNODES_INIT; + sd->span = *cpu_map; + group = cpu_to_allnodes_group(i); + sd->groups = &sched_group_allnodes[group]; + p = sd; + } else + p = NULL; + sd = &per_cpu(node_domains, i); - group = cpu_to_node_group(i); *sd = SD_NODE_INIT; - sd->span = *cpu_map; - sd->groups = &sched_group_nodes[group]; + sd->span = sched_domain_node_span(cpu_to_node(i)); + sd->parent = p; + cpus_and(sd->span, sd->span, *cpu_map); #endif p = sd; @@ -4972,7 +5040,7 @@ static void build_sched_domains(const cpumask_t *cpu_map) #ifdef CONFIG_SCHED_SMT /* Set up CPU (sibling) groups */ - for_each_online_cpu(i) { + for_each_cpu_mask(i, *cpu_map) { cpumask_t this_sibling_map = cpu_sibling_map[i]; cpus_and(this_sibling_map, this_sibling_map, *cpu_map); if (i != first_cpu(this_sibling_map)) @@ -4997,8 +5065,74 @@ static void build_sched_domains(const cpumask_t *cpu_map) #ifdef CONFIG_NUMA /* Set up node groups */ - init_sched_build_groups(sched_group_nodes, *cpu_map, - &cpu_to_node_group); + init_sched_build_groups(sched_group_allnodes, *cpu_map, + &cpu_to_allnodes_group); + + for (i = 0; i < MAX_NUMNODES; i++) { + /* Set up node groups */ + struct sched_group *sg, *prev; + cpumask_t nodemask = node_to_cpumask(i); + cpumask_t domainspan; + cpumask_t covered = CPU_MASK_NONE; + int j; + + cpus_and(nodemask, nodemask, *cpu_map); + if (cpus_empty(nodemask)) + continue; + + domainspan = sched_domain_node_span(i); + cpus_and(domainspan, domainspan, *cpu_map); + + sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); + sched_group_nodes[i] = sg; + for_each_cpu_mask(j, nodemask) { + struct sched_domain *sd; + sd = &per_cpu(node_domains, j); + sd->groups = sg; + if (sd->groups == NULL) { + /* Turn off balancing if we have no groups */ + sd->flags = 0; + } + } + if (!sg) { + printk(KERN_WARNING + "Can not alloc domain group for node %d\n", i); + continue; + } + sg->cpu_power = 0; + sg->cpumask = nodemask; + cpus_or(covered, covered, nodemask); + prev = sg; + + for (j = 0; j < MAX_NUMNODES; j++) { + cpumask_t tmp, notcovered; + int n = (i + j) % MAX_NUMNODES; + + cpus_complement(notcovered, covered); + cpus_and(tmp, notcovered, *cpu_map); + cpus_and(tmp, tmp, domainspan); + if (cpus_empty(tmp)) + break; + + nodemask = node_to_cpumask(n); + cpus_and(tmp, tmp, nodemask); + if (cpus_empty(tmp)) + continue; + + sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); + if (!sg) { + printk(KERN_WARNING + "Can not alloc domain group for node %d\n", j); + break; + } + sg->cpu_power = 0; + sg->cpumask = tmp; + cpus_or(covered, covered, tmp); + prev->next = sg; + prev = sg; + } + prev->next = sched_group_nodes[i]; + } #endif /* Calculate CPU power for physical packages and nodes */ @@ -5017,14 +5151,46 @@ static void build_sched_domains(const cpumask_t *cpu_map) sd->groups->cpu_power = power; #ifdef CONFIG_NUMA - if (i == first_cpu(sd->groups->cpumask)) { - /* Only add "power" once for each physical package. */ - sd = &per_cpu(node_domains, i); - sd->groups->cpu_power += power; + sd = &per_cpu(allnodes_domains, i); + if (sd->groups) { + power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * + (cpus_weight(sd->groups->cpumask)-1) / 10; + sd->groups->cpu_power = power; } #endif } +#ifdef CONFIG_NUMA + for (i = 0; i < MAX_NUMNODES; i++) { + struct sched_group *sg = sched_group_nodes[i]; + int j; + + if (sg == NULL) + continue; +next_sg: + for_each_cpu_mask(j, sg->cpumask) { + struct sched_domain *sd; + int power; + + sd = &per_cpu(phys_domains, j); + if (j != first_cpu(sd->groups->cpumask)) { + /* + * Only add "power" once for each + * physical package. + */ + continue; + } + power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * + (cpus_weight(sd->groups->cpumask)-1) / 10; + + sg->cpu_power += power; + } + sg = sg->next; + if (sg != sched_group_nodes[i]) + goto next_sg; + } +#endif + /* Attach the domains */ for_each_cpu_mask(i, *cpu_map) { struct sched_domain *sd; @@ -5039,13 +5205,10 @@ static void build_sched_domains(const cpumask_t *cpu_map) /* * Set up scheduler domains and groups. Callers must hold the hotplug lock. */ -static void arch_init_sched_domains(cpumask_t *cpu_map) +static void arch_init_sched_domains(const cpumask_t *cpu_map) { cpumask_t cpu_default_map; -#if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA) - check_sibling_maps(); -#endif /* * Setup mask for cpus without special case scheduling requirements. * For now this just excludes isolated cpus, but could be used to @@ -5058,10 +5221,29 @@ static void arch_init_sched_domains(cpumask_t *cpu_map) static void arch_destroy_sched_domains(const cpumask_t *cpu_map) { - /* Do nothing: everything is statically allocated. */ -} +#ifdef CONFIG_NUMA + int i; + for (i = 0; i < MAX_NUMNODES; i++) { + cpumask_t nodemask = node_to_cpumask(i); + struct sched_group *oldsg, *sg = sched_group_nodes[i]; -#endif /* ARCH_HAS_SCHED_DOMAIN */ + cpus_and(nodemask, nodemask, *cpu_map); + if (cpus_empty(nodemask)) + continue; + + if (sg == NULL) + continue; + sg = sg->next; +next_sg: + oldsg = sg; + sg = sg->next; + kfree(oldsg); + if (oldsg != sched_group_nodes[i]) + goto next_sg; + sched_group_nodes[i] = NULL; + } +#endif +} /* * Detach sched domains from a group of cpus specified in cpu_map -- cgit v1.2.3