From 6ac5c5310ca9d7dd3d7e677c2715b1f06a348330 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 3 Nov 2009 14:58:30 +1030 Subject: cpumask: Use modern cpumask style in arch/x86/kernel/cpu/mcheck/mce-inject.c Note that there's no freeing the cpu var, since this module has no unload function. Signed-off-by: Rusty Russell Cc: Andi Kleen Cc: Huang Ying LKML-Reference: <200911031458.30987.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce-inject.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 472763d92098..73734baa50f2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -74,7 +74,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) m->finished = 0; } -static cpumask_t mce_inject_cpumask; +static cpumask_var_t mce_inject_cpumask; static int mce_raise_notify(struct notifier_block *self, unsigned long val, void *data) @@ -82,9 +82,9 @@ static int mce_raise_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int cpu = smp_processor_id(); struct mce *m = &__get_cpu_var(injectm); - if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask)) + if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) return NOTIFY_DONE; - cpu_clear(cpu, mce_inject_cpumask); + cpumask_clear_cpu(cpu, mce_inject_cpumask); if (m->inject_flags & MCJ_EXCEPTION) raise_exception(m, args->regs); else if (m->status) @@ -148,22 +148,22 @@ static void raise_mce(struct mce *m) unsigned long start; int cpu; get_online_cpus(); - mce_inject_cpumask = cpu_online_map; - cpu_clear(get_cpu(), mce_inject_cpumask); + cpumask_copy(mce_inject_cpumask, cpu_online_mask); + cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); for_each_online_cpu(cpu) { struct mce *mcpu = &per_cpu(injectm, cpu); if (!mcpu->finished || MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) - cpu_clear(cpu, mce_inject_cpumask); + cpumask_clear_cpu(cpu, mce_inject_cpumask); } - if (!cpus_empty(mce_inject_cpumask)) - apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR); + if (!cpumask_empty(mce_inject_cpumask)) + apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR); start = jiffies; - while (!cpus_empty(mce_inject_cpumask)) { + while (!cpumask_empty(mce_inject_cpumask)) { if (!time_before(jiffies, start + 2*HZ)) { printk(KERN_ERR "Timeout waiting for mce inject NMI %lx\n", - *cpus_addr(mce_inject_cpumask)); + *cpumask_bits(mce_inject_cpumask)); break; } cpu_relax(); @@ -210,6 +210,8 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, static int inject_init(void) { + if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) + return -ENOMEM; printk(KERN_INFO "Machine check injector initialized\n"); mce_chrdev_ops.write = mce_write; register_die_notifier(&mce_raise_nb); -- cgit v1.2.3 From e258e4e0b495e6ecbd073d6bef1eafb62a58919a Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 19:01:51 -0500 Subject: x86-32: Add new pt_regs stubs Add new stubs which add the pt_regs pointer as the last arg, matching 64-bit. This will allow these syscalls to be easily merged. Signed-off-by: Brian Gerst LKML-Reference: <1260403316-5679-2-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_32.S | 49 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 50b9c220e121..34dbfa909dd7 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -725,22 +725,49 @@ END(syscall_badsys) /* * System calls that need a pt_regs pointer. */ -#define PTREGSCALL(name) \ +#define PTREGSCALL0(name) \ ALIGN; \ ptregs_##name: \ leal 4(%esp),%eax; \ jmp sys_##name; -PTREGSCALL(iopl) -PTREGSCALL(fork) -PTREGSCALL(clone) -PTREGSCALL(vfork) -PTREGSCALL(execve) -PTREGSCALL(sigaltstack) -PTREGSCALL(sigreturn) -PTREGSCALL(rt_sigreturn) -PTREGSCALL(vm86) -PTREGSCALL(vm86old) +#define PTREGSCALL1(name) \ + ALIGN; \ +ptregs_##name: \ + leal 4(%esp),%edx; \ + movl PT_EBX(%edx),%eax; \ + jmp sys_##name; + +#define PTREGSCALL2(name) \ + ALIGN; \ +ptregs_##name: \ + leal 4(%esp),%ecx; \ + movl PT_ECX(%ecx),%edx; \ + movl PT_EBX(%ecx),%eax; \ + jmp sys_##name; + +#define PTREGSCALL3(name) \ + ALIGN; \ +ptregs_##name: \ + leal 4(%esp),%eax; \ + pushl %eax; \ + movl PT_EDX(%eax),%ecx; \ + movl PT_ECX(%eax),%edx; \ + movl PT_EBX(%eax),%eax; \ + call sys_##name; \ + addl $4,%esp; \ + ret + +PTREGSCALL0(iopl) +PTREGSCALL0(fork) +PTREGSCALL0(clone) +PTREGSCALL0(vfork) +PTREGSCALL0(execve) +PTREGSCALL0(sigaltstack) +PTREGSCALL0(sigreturn) +PTREGSCALL0(rt_sigreturn) +PTREGSCALL0(vm86) +PTREGSCALL0(vm86old) .macro FIXUP_ESPFIX_STACK /* -- cgit v1.2.3 From 27f59559d63375a4d59e7c720a439d9f0b47edad Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 19:01:52 -0500 Subject: x86: Merge sys_iopl Change 32-bit sys_iopl to PTREGSCALL1, and merge with 64-bit. Signed-off-by: Brian Gerst LKML-Reference: <1260403316-5679-3-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/syscalls.h | 6 +----- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/ioport.c | 28 +++++----------------------- 3 files changed, 7 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 372b76edd63f..4b694cd904c4 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -18,6 +18,7 @@ /* Common in X86_32 and X86_64 */ /* kernel/ioport.c */ asmlinkage long sys_ioperm(unsigned long, unsigned long, int); +long sys_iopl(unsigned int, struct pt_regs *); /* kernel/process.c */ int sys_fork(struct pt_regs *); @@ -35,8 +36,6 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *); /* X86_32 only */ #ifdef CONFIG_X86_32 -/* kernel/ioport.c */ -long sys_iopl(struct pt_regs *); /* kernel/process_32.c */ int sys_clone(struct pt_regs *); @@ -70,9 +69,6 @@ int sys_vm86(struct pt_regs *); #else /* CONFIG_X86_32 */ /* X86_64 only */ -/* kernel/ioport.c */ -asmlinkage long sys_iopl(unsigned int, struct pt_regs *); - /* kernel/process_64.c */ asmlinkage long sys_clone(unsigned long, unsigned long, void __user *, void __user *, diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 34dbfa909dd7..ab7fcef37453 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -758,7 +758,7 @@ ptregs_##name: \ addl $4,%esp; \ ret -PTREGSCALL0(iopl) +PTREGSCALL1(iopl) PTREGSCALL0(fork) PTREGSCALL0(clone) PTREGSCALL0(vfork) diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 99c4d308f16b..85ecc7c57ba6 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -103,9 +103,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) * on system-call entry - see also fork() and the signal handling * code. */ -static int do_iopl(unsigned int level, struct pt_regs *regs) +long sys_iopl(unsigned int level, struct pt_regs *regs) { unsigned int old = (regs->flags >> 12) & 3; + struct thread_struct *t = ¤t->thread; if (level > 3) return -EINVAL; @@ -115,29 +116,10 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) return -EPERM; } regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); - - return 0; -} - #ifdef CONFIG_X86_32 -long sys_iopl(struct pt_regs *regs) -{ - unsigned int level = regs->bx; - struct thread_struct *t = ¤t->thread; - int rc; - - rc = do_iopl(level, regs); - if (rc < 0) - goto out; - t->iopl = level << 12; set_iopl_mask(t->iopl); -out: - return rc; -} -#else -asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs) -{ - return do_iopl(level, regs); -} #endif + + return 0; +} -- cgit v1.2.3 From 11cf88bd0b8165b65aaabaee0977e9a3ad474ab7 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 19:01:53 -0500 Subject: x86: Merge sys_execve Change 32-bit sys_execve to PTREGSCALL3, and merge with 64-bit. Signed-off-by: Brian Gerst LKML-Reference: <1260403316-5679-4-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/syscalls.h | 6 ++---- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/process.c | 26 ++++++++++++++++++++++++++ arch/x86/kernel/process_32.c | 25 ------------------------- arch/x86/kernel/process_64.c | 19 ------------------- 5 files changed, 29 insertions(+), 49 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 4b694cd904c4..48c48e508b9f 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -23,6 +23,8 @@ long sys_iopl(unsigned int, struct pt_regs *); /* kernel/process.c */ int sys_fork(struct pt_regs *); int sys_vfork(struct pt_regs *); +long sys_execve(char __user *, char __user * __user *, + char __user * __user *, struct pt_regs *); /* kernel/ldt.c */ asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); @@ -39,7 +41,6 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *); /* kernel/process_32.c */ int sys_clone(struct pt_regs *); -int sys_execve(struct pt_regs *); /* kernel/signal.c */ asmlinkage int sys_sigsuspend(int, int, old_sigset_t); @@ -73,9 +74,6 @@ int sys_vm86(struct pt_regs *); asmlinkage long sys_clone(unsigned long, unsigned long, void __user *, void __user *, struct pt_regs *); -asmlinkage long sys_execve(char __user *, char __user * __user *, - char __user * __user *, - struct pt_regs *); long sys_arch_prctl(int, unsigned long); /* kernel/signal.c */ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index ab7fcef37453..a96a0d8a0fdb 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -762,7 +762,7 @@ PTREGSCALL1(iopl) PTREGSCALL0(fork) PTREGSCALL0(clone) PTREGSCALL0(vfork) -PTREGSCALL0(execve) +PTREGSCALL3(execve) PTREGSCALL0(sigaltstack) PTREGSCALL0(sigreturn) PTREGSCALL0(rt_sigreturn) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5e2ba634ea15..bb17bd9334fb 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -235,6 +235,32 @@ int sys_vfork(struct pt_regs *regs) } +/* + * sys_execve() executes a new program. + */ +long sys_execve(char __user *name, char __user * __user *argv, + char __user * __user *envp, struct pt_regs *regs) +{ + long error; + char *filename; + + filename = getname(name); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + return error; + error = do_execve(filename, argv, envp, regs); + +#ifdef CONFIG_X86_32 + if (error == 0) { + /* Make sure we don't return using sysenter.. */ + set_thread_flag(TIF_IRET); + } +#endif + + putname(filename); + return error; +} + /* * Idle related variables and functions */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 075580b35682..486e38e2900b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -451,31 +451,6 @@ int sys_clone(struct pt_regs *regs) return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); } -/* - * sys_execve() executes a new program. - */ -int sys_execve(struct pt_regs *regs) -{ - int error; - char *filename; - - filename = getname((char __user *) regs->bx); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - goto out; - error = do_execve(filename, - (char __user * __user *) regs->cx, - (char __user * __user *) regs->dx, - regs); - if (error == 0) { - /* Make sure we don't return using sysenter.. */ - set_thread_flag(TIF_IRET); - } - putname(filename); -out: - return error; -} - #define top_esp (THREAD_SIZE - sizeof(unsigned long)) #define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c95c8f4e790a..671960d82587 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -520,25 +520,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } -/* - * sys_execve() executes a new program. - */ -asmlinkage -long sys_execve(char __user *name, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs) -{ - long error; - char *filename; - - filename = getname(name); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - return error; - error = do_execve(filename, argv, envp, regs); - putname(filename); - return error; -} - void set_personality_64bit(void) { /* inherit personality from parent */ -- cgit v1.2.3 From 052acad48a566a6dbcccb95e5d22e5e1b7cac8dd Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 19:01:54 -0500 Subject: x86: Merge sys_sigaltstack Change 32-bit sys_sigaltstack to PTREGSCALL2, and merge with 64-bit. Signed-off-by: Brian Gerst LKML-Reference: <1260403316-5679-5-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/syscalls.h | 8 +++----- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/signal.c | 12 +----------- 3 files changed, 5 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 48c48e508b9f..94e0b61fb040 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -31,6 +31,9 @@ asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); /* kernel/signal.c */ long sys_rt_sigreturn(struct pt_regs *); +long sys_sigaltstack(const stack_t __user *, stack_t __user *, + struct pt_regs *); + /* kernel/tls.c */ asmlinkage int sys_set_thread_area(struct user_desc __user *); @@ -46,7 +49,6 @@ int sys_clone(struct pt_regs *); asmlinkage int sys_sigsuspend(int, int, old_sigset_t); asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); -int sys_sigaltstack(struct pt_regs *); unsigned long sys_sigreturn(struct pt_regs *); /* kernel/sys_i386_32.c */ @@ -76,10 +78,6 @@ asmlinkage long sys_clone(unsigned long, unsigned long, struct pt_regs *); long sys_arch_prctl(int, unsigned long); -/* kernel/signal.c */ -asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, - struct pt_regs *); - /* kernel/sys_x86_64.c */ struct new_utsname; diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index a96a0d8a0fdb..621ef4599416 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -763,7 +763,7 @@ PTREGSCALL0(fork) PTREGSCALL0(clone) PTREGSCALL0(vfork) PTREGSCALL3(execve) -PTREGSCALL0(sigaltstack) +PTREGSCALL2(sigaltstack) PTREGSCALL0(sigreturn) PTREGSCALL0(rt_sigreturn) PTREGSCALL0(vm86) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 74fe6d86dc5d..4fd173cd8e57 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -545,22 +545,12 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, } #endif /* CONFIG_X86_32 */ -#ifdef CONFIG_X86_32 -int sys_sigaltstack(struct pt_regs *regs) -{ - const stack_t __user *uss = (const stack_t __user *)regs->bx; - stack_t __user *uoss = (stack_t __user *)regs->cx; - - return do_sigaltstack(uss, uoss, regs->sp); -} -#else /* !CONFIG_X86_32 */ -asmlinkage long +long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, struct pt_regs *regs) { return do_sigaltstack(uss, uoss, regs->sp); } -#endif /* CONFIG_X86_32 */ /* * Do a signal return; undo the signal stack. -- cgit v1.2.3 From f1382f157fb1175bba008abad0907310a1e459ce Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 19:01:55 -0500 Subject: x86, 32-bit: Convert sys_vm86 & sys_vm86old Convert these to new PTREGSCALL stubs. Signed-off-by: Brian Gerst LKML-Reference: <1260403316-5679-6-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/syscalls.h | 4 ++-- arch/x86/kernel/entry_32.S | 4 ++-- arch/x86/kernel/vm86_32.c | 11 +++++------ 3 files changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 94e0b61fb040..df2c51106565 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -66,8 +66,8 @@ asmlinkage int sys_uname(struct old_utsname __user *); asmlinkage int sys_olduname(struct oldold_utsname __user *); /* kernel/vm86_32.c */ -int sys_vm86old(struct pt_regs *); -int sys_vm86(struct pt_regs *); +int sys_vm86old(struct vm86_struct __user *, struct pt_regs *); +int sys_vm86(unsigned long, unsigned long, struct pt_regs *); #else /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 621ef4599416..6c2f25d9b9d5 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -766,8 +766,8 @@ PTREGSCALL3(execve) PTREGSCALL2(sigaltstack) PTREGSCALL0(sigreturn) PTREGSCALL0(rt_sigreturn) -PTREGSCALL0(vm86) -PTREGSCALL0(vm86old) +PTREGSCALL2(vm86) +PTREGSCALL1(vm86old) .macro FIXUP_ESPFIX_STACK /* diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 9c4e62539058..5ffb5622f793 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -197,9 +197,8 @@ out: static int do_vm86_irq_handling(int subfunction, int irqnumber); static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); -int sys_vm86old(struct pt_regs *regs) +int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs) { - struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx; struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. * This remains on the stack until we @@ -227,7 +226,7 @@ out: } -int sys_vm86(struct pt_regs *regs) +int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs) { struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. @@ -239,12 +238,12 @@ int sys_vm86(struct pt_regs *regs) struct vm86plus_struct __user *v86; tsk = current; - switch (regs->bx) { + switch (cmd) { case VM86_REQUEST_IRQ: case VM86_FREE_IRQ: case VM86_GET_IRQ_BITS: case VM86_GET_AND_RESET_IRQ: - ret = do_vm86_irq_handling(regs->bx, (int)regs->cx); + ret = do_vm86_irq_handling(cmd, (int)arg); goto out; case VM86_PLUS_INSTALL_CHECK: /* @@ -261,7 +260,7 @@ int sys_vm86(struct pt_regs *regs) ret = -EPERM; if (tsk->thread.saved_sp0) goto out; - v86 = (struct vm86plus_struct __user *)regs->cx; + v86 = (struct vm86plus_struct __user *)arg; tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, offsetof(struct kernel_vm86_struct, regs32) - sizeof(info.regs)); -- cgit v1.2.3 From f839bbc5c81b1c92ff8e81c360e9564f7b961b2e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 19:01:56 -0500 Subject: x86: Merge sys_clone Change 32-bit sys_clone to new PTREGSCALL stub, and merge with 64-bit. Signed-off-by: Brian Gerst LKML-Reference: <1260403316-5679-7-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/syscalls.h | 8 ++------ arch/x86/kernel/entry_32.S | 14 +++++++++++++- arch/x86/kernel/process.c | 9 +++++++++ arch/x86/kernel/process_32.c | 15 --------------- arch/x86/kernel/process_64.c | 9 --------- 5 files changed, 24 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index df2c51106565..b0ce78061708 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -25,6 +25,8 @@ int sys_fork(struct pt_regs *); int sys_vfork(struct pt_regs *); long sys_execve(char __user *, char __user * __user *, char __user * __user *, struct pt_regs *); +long sys_clone(unsigned long, unsigned long, void __user *, + void __user *, struct pt_regs *); /* kernel/ldt.c */ asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); @@ -42,9 +44,6 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *); /* X86_32 only */ #ifdef CONFIG_X86_32 -/* kernel/process_32.c */ -int sys_clone(struct pt_regs *); - /* kernel/signal.c */ asmlinkage int sys_sigsuspend(int, int, old_sigset_t); asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, @@ -73,9 +72,6 @@ int sys_vm86(unsigned long, unsigned long, struct pt_regs *); /* X86_64 only */ /* kernel/process_64.c */ -asmlinkage long sys_clone(unsigned long, unsigned long, - void __user *, void __user *, - struct pt_regs *); long sys_arch_prctl(int, unsigned long); /* kernel/sys_x86_64.c */ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 6c2f25d9b9d5..6492555d123d 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -760,7 +760,6 @@ ptregs_##name: \ PTREGSCALL1(iopl) PTREGSCALL0(fork) -PTREGSCALL0(clone) PTREGSCALL0(vfork) PTREGSCALL3(execve) PTREGSCALL2(sigaltstack) @@ -769,6 +768,19 @@ PTREGSCALL0(rt_sigreturn) PTREGSCALL2(vm86) PTREGSCALL1(vm86old) +/* Clone is an oddball. The 4th arg is in %edi */ + ALIGN; +ptregs_clone: + leal 4(%esp),%eax + pushl %eax + pushl PT_EDI(%eax) + movl PT_EDX(%eax),%ecx + movl PT_ECX(%eax),%edx + movl PT_EBX(%eax),%eax + call sys_clone + addl $8,%esp + ret + .macro FIXUP_ESPFIX_STACK /* * Switch back for ESPFIX stack to the normal zerobased stack diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index bb17bd9334fb..f3c1a6b3a65e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -234,6 +234,15 @@ int sys_vfork(struct pt_regs *regs) NULL, NULL); } +long +sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) +{ + if (!newsp) + newsp = regs->sp; + return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); +} + /* * sys_execve() executes a new program. diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 486e38e2900b..506d5a7ba17c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -436,21 +436,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } -int sys_clone(struct pt_regs *regs) -{ - unsigned long clone_flags; - unsigned long newsp; - int __user *parent_tidptr, *child_tidptr; - - clone_flags = regs->bx; - newsp = regs->cx; - parent_tidptr = (int __user *)regs->dx; - child_tidptr = (int __user *)regs->di; - if (!newsp) - newsp = regs->sp; - return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); -} - #define top_esp (THREAD_SIZE - sizeof(unsigned long)) #define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 671960d82587..83019f94b83d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -534,15 +534,6 @@ void set_personality_64bit(void) current->personality &= ~READ_IMPLIES_EXEC; } -asmlinkage long -sys_clone(unsigned long clone_flags, unsigned long newsp, - void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) -{ - if (!newsp) - newsp = regs->sp; - return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); -} - unsigned long get_wchan(struct task_struct *p) { unsigned long stack; -- cgit v1.2.3 From ce9119ad90b1caba550447bfcc0a21850558ca49 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 9 Dec 2009 16:33:44 -0800 Subject: x86-32: Avoid pipeline serialization in PTREGSCALL1 and 2 In the PTREGSCALL1 and 2 macros, we can trivially avoid an unnecessary pipeline serialization, so do so. In PTREGSCALLS3 this is much less clear-cut since we have to push a new value to the stack. Leave it alone for now assuming it is as good as it is going to be; may want to check on Atom or another in-order x86 to see if we can do better. Signed-off-by: H. Peter Anvin Cc: Brian Gerst LKML-Reference: <1260403316-5679-2-git-send-email-brgerst@gmail.com> --- arch/x86/kernel/entry_32.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 6492555d123d..cb12b9bfc9cc 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -735,15 +735,15 @@ ptregs_##name: \ ALIGN; \ ptregs_##name: \ leal 4(%esp),%edx; \ - movl PT_EBX(%edx),%eax; \ + movl (PT_EBX+4)(%esp),%eax; \ jmp sys_##name; #define PTREGSCALL2(name) \ ALIGN; \ ptregs_##name: \ leal 4(%esp),%ecx; \ - movl PT_ECX(%ecx),%edx; \ - movl PT_EBX(%ecx),%eax; \ + movl (PT_ECX+4)(%esp),%edx; \ + movl (PT_EBX+4)(%esp),%eax; \ jmp sys_##name; #define PTREGSCALL3(name) \ -- cgit v1.2.3 From fc380ceed7fe469728ea4acdbda4495ea943ee1c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 9 Dec 2009 16:54:08 -0800 Subject: x86-64, paravirt: Call set_iopl_mask() on 64 bits set_iopl_mask() is a no-op on 64 bits, but it is also a paravirt hook, so call it even on 64 bits. Signed-off-by: H. Peter Anvin Cc: Jeremy Fitzhardinge Cc: Brian Gerst LKML-Reference: <1260403316-5679-3-git-send-email-brgerst@gmail.com> --- arch/x86/kernel/ioport.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 85ecc7c57ba6..8eec0ec59af2 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -116,10 +116,8 @@ long sys_iopl(unsigned int level, struct pt_regs *regs) return -EPERM; } regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); -#ifdef CONFIG_X86_32 t->iopl = level << 12; set_iopl_mask(t->iopl); -#endif return 0; } -- cgit v1.2.3 From 3bd95dfb182969dc6d2a317c150e0df7107608d3 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 12:34:40 -0500 Subject: x86, 64-bit: Move kernel_thread to C Prepare for merging with 32-bit. Signed-off-by: Brian Gerst LKML-Reference: <1260380084-3707-2-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_64.S | 49 +++------------------------------------- arch/x86/kernel/process_64.c | 31 +++++++++++++++++++++++-- arch/x86/kernel/x8664_ksyms_64.c | 2 -- 3 files changed, 32 insertions(+), 50 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 63bca794c8f9..73d9b2c0e217 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1166,63 +1166,20 @@ bad_gs: jmp 2b .previous -/* - * Create a kernel thread. - * - * C extern interface: - * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) - * - * asm input arguments: - * rdi: fn, rsi: arg, rdx: flags - */ -ENTRY(kernel_thread) - CFI_STARTPROC - FAKE_STACK_FRAME $child_rip - SAVE_ALL - - # rdi: flags, rsi: usp, rdx: will be &pt_regs - movq %rdx,%rdi - orq kernel_thread_flags(%rip),%rdi - movq $-1, %rsi - movq %rsp, %rdx - - xorl %r8d,%r8d - xorl %r9d,%r9d - - # clone now - call do_fork - movq %rax,RAX(%rsp) - xorl %edi,%edi - - /* - * It isn't worth to check for reschedule here, - * so internally to the x86_64 port you can rely on kernel_thread() - * not to reschedule the child before returning, this avoids the need - * of hacks for example to fork off the per-CPU idle tasks. - * [Hopefully no generic code relies on the reschedule -AK] - */ - RESTORE_ALL - UNFAKE_STACK_FRAME - ret - CFI_ENDPROC -END(kernel_thread) - -ENTRY(child_rip) +ENTRY(kernel_thread_helper) pushq $0 # fake return address CFI_STARTPROC /* * Here we are in the child and the registers are set as they were * at kernel_thread() invocation in the parent. */ - movq %rdi, %rax - movq %rsi, %rdi - call *%rax + call *%rsi # exit mov %eax, %edi call do_exit ud2 # padding for call trace CFI_ENDPROC -END(child_rip) +END(kernel_thread_helper) /* * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 83019f94b83d..92484c2130c6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -59,8 +59,6 @@ asmlinkage extern void ret_from_fork(void); DEFINE_PER_CPU(unsigned long, old_rsp); static DEFINE_PER_CPU(unsigned char, is_idle); -unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; - static ATOMIC_NOTIFIER_HEAD(idle_notifier); void idle_notifier_register(struct notifier_block *n) @@ -231,6 +229,35 @@ void show_regs(struct pt_regs *regs) show_trace(NULL, regs, (void *)(regs + 1), regs->bp); } +/* + * This gets run with %si containing the + * function to call, and %di containing + * the "args". + */ +extern void kernel_thread_helper(void); + +/* + * Create a kernel thread + */ +int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) +{ + struct pt_regs regs; + + memset(®s, 0, sizeof(regs)); + + regs.si = (unsigned long) fn; + regs.di = (unsigned long) arg; + + regs.orig_ax = -1; + regs.ip = (unsigned long) kernel_thread_helper; + regs.cs = __KERNEL_CS; + regs.flags = X86_EFLAGS_IF; + + /* Ok, create the new process.. */ + return do_fork(flags | CLONE_VM | CLONE_UNTRACED, ~0UL, ®s, 0, NULL, NULL); +} +EXPORT_SYMBOL(kernel_thread); + void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index a1029769b6f2..9fafaf83b3b8 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -17,8 +17,6 @@ EXPORT_SYMBOL(mcount); #endif -EXPORT_SYMBOL(kernel_thread); - EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); EXPORT_SYMBOL(__get_user_4); -- cgit v1.2.3 From fa4b8f84383ae197e643a46c36bf58ab8dffc95c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 12:34:41 -0500 Subject: x86, 64-bit: Use user_mode() to determine new stack pointer in copy_thread() Use user_mode() instead of a magic value for sp to determine when returning to kernel mode. Signed-off-by: Brian Gerst LKML-Reference: <1260380084-3707-3-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/process_64.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 92484c2130c6..00ac66fa5c6b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -254,7 +254,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.flags = X86_EFLAGS_IF; /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, ~0UL, ®s, 0, NULL, NULL); + return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); } EXPORT_SYMBOL(kernel_thread); @@ -312,8 +312,9 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, *childregs = *regs; childregs->ax = 0; - childregs->sp = sp; - if (sp == ~0UL) + if (user_mode(regs)) + childregs->sp = sp; + else childregs->sp = (unsigned long)childregs; p->thread.sp = (unsigned long) childregs; -- cgit v1.2.3 From e840227c141116171c89ab1abb5cc9fee6fdb488 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 12:34:42 -0500 Subject: x86, 32-bit: Use same regs as 64-bit for kernel_thread_helper The arg should be in %eax, but that is clobbered by the return value of clone. The function pointer can be in any register. Also, don't push args onto the stack, since regparm(3) is the normal calling convention now. Signed-off-by: Brian Gerst LKML-Reference: <1260380084-3707-4-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_32.S | 8 ++------ arch/x86/kernel/process_32.c | 8 ++++---- 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index cb12b9bfc9cc..44a8e0dc6737 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1047,12 +1047,8 @@ END(spurious_interrupt_bug) ENTRY(kernel_thread_helper) pushl $0 # fake return address for unwinder CFI_STARTPROC - movl %edx,%eax - push %edx - CFI_ADJUST_CFA_OFFSET 4 - call *%ebx - push %eax - CFI_ADJUST_CFA_OFFSET 4 + movl %edi,%eax + call *%esi call do_exit ud2 # padding for call trace CFI_ENDPROC diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 506d5a7ba17c..bd874d2b6ab1 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -193,8 +193,8 @@ void show_regs(struct pt_regs *regs) } /* - * This gets run with %bx containing the - * function to call, and %dx containing + * This gets run with %si containing the + * function to call, and %di containing * the "args". */ extern void kernel_thread_helper(void); @@ -208,8 +208,8 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) memset(®s, 0, sizeof(regs)); - regs.bx = (unsigned long) fn; - regs.dx = (unsigned long) arg; + regs.si = (unsigned long) fn; + regs.di = (unsigned long) arg; regs.ds = __USER_DS; regs.es = __USER_DS; -- cgit v1.2.3 From f443ff4201dd25cd4dec183f9919ecba90c8edc2 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 12:34:43 -0500 Subject: x86: Sync 32/64-bit kernel_thread Signed-off-by: Brian Gerst LKML-Reference: <1260380084-3707-5-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/process_32.c | 5 ++++- arch/x86/kernel/process_64.c | 11 +++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index bd874d2b6ab1..f2e8b05a4f02 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -211,14 +211,17 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.si = (unsigned long) fn; regs.di = (unsigned long) arg; +#ifdef CONFIG_X86_32 regs.ds = __USER_DS; regs.es = __USER_DS; regs.fs = __KERNEL_PERCPU; regs.gs = __KERNEL_STACK_CANARY; +#endif + regs.orig_ax = -1; regs.ip = (unsigned long) kernel_thread_helper; regs.cs = __KERNEL_CS | get_kernel_rpl(); - regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; + regs.flags = X86_EFLAGS_IF | 0x2; /* Ok, create the new process.. */ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 00ac66fa5c6b..d49a9094f6f3 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -248,10 +248,17 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.si = (unsigned long) fn; regs.di = (unsigned long) arg; +#ifdef CONFIG_X86_32 + regs.ds = __USER_DS; + regs.es = __USER_DS; + regs.fs = __KERNEL_PERCPU; + regs.gs = __KERNEL_STACK_CANARY; +#endif + regs.orig_ax = -1; regs.ip = (unsigned long) kernel_thread_helper; - regs.cs = __KERNEL_CS; - regs.flags = X86_EFLAGS_IF; + regs.cs = __KERNEL_CS | get_kernel_rpl(); + regs.flags = X86_EFLAGS_IF | 0x2; /* Ok, create the new process.. */ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); -- cgit v1.2.3 From df59e7bf439918f523ac29e996ec1eebbed60440 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 9 Dec 2009 12:34:44 -0500 Subject: x86: Merge kernel_thread() Signed-off-by: Brian Gerst LKML-Reference: <1260380084-3707-6-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/process.c | 35 +++++++++++++++++++++++++++++++++++ arch/x86/kernel/process_32.c | 36 ------------------------------------ arch/x86/kernel/process_64.c | 36 ------------------------------------ 3 files changed, 35 insertions(+), 72 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index f3c1a6b3a65e..8705ccedd447 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -243,6 +243,41 @@ sys_clone(unsigned long clone_flags, unsigned long newsp, return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); } +/* + * This gets run with %si containing the + * function to call, and %di containing + * the "args". + */ +extern void kernel_thread_helper(void); + +/* + * Create a kernel thread + */ +int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) +{ + struct pt_regs regs; + + memset(®s, 0, sizeof(regs)); + + regs.si = (unsigned long) fn; + regs.di = (unsigned long) arg; + +#ifdef CONFIG_X86_32 + regs.ds = __USER_DS; + regs.es = __USER_DS; + regs.fs = __KERNEL_PERCPU; + regs.gs = __KERNEL_STACK_CANARY; +#endif + + regs.orig_ax = -1; + regs.ip = (unsigned long) kernel_thread_helper; + regs.cs = __KERNEL_CS | get_kernel_rpl(); + regs.flags = X86_EFLAGS_IF | 0x2; + + /* Ok, create the new process.. */ + return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); +} +EXPORT_SYMBOL(kernel_thread); /* * sys_execve() executes a new program. diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f2e8b05a4f02..ccf234266a2e 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -192,42 +192,6 @@ void show_regs(struct pt_regs *regs) show_trace(NULL, regs, ®s->sp, regs->bp); } -/* - * This gets run with %si containing the - * function to call, and %di containing - * the "args". - */ -extern void kernel_thread_helper(void); - -/* - * Create a kernel thread - */ -int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) -{ - struct pt_regs regs; - - memset(®s, 0, sizeof(regs)); - - regs.si = (unsigned long) fn; - regs.di = (unsigned long) arg; - -#ifdef CONFIG_X86_32 - regs.ds = __USER_DS; - regs.es = __USER_DS; - regs.fs = __KERNEL_PERCPU; - regs.gs = __KERNEL_STACK_CANARY; -#endif - - regs.orig_ax = -1; - regs.ip = (unsigned long) kernel_thread_helper; - regs.cs = __KERNEL_CS | get_kernel_rpl(); - regs.flags = X86_EFLAGS_IF | 0x2; - - /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); -} -EXPORT_SYMBOL(kernel_thread); - void release_thread(struct task_struct *dead_task) { BUG_ON(dead_task->mm); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d49a9094f6f3..1a362c5bec37 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -229,42 +229,6 @@ void show_regs(struct pt_regs *regs) show_trace(NULL, regs, (void *)(regs + 1), regs->bp); } -/* - * This gets run with %si containing the - * function to call, and %di containing - * the "args". - */ -extern void kernel_thread_helper(void); - -/* - * Create a kernel thread - */ -int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) -{ - struct pt_regs regs; - - memset(®s, 0, sizeof(regs)); - - regs.si = (unsigned long) fn; - regs.di = (unsigned long) arg; - -#ifdef CONFIG_X86_32 - regs.ds = __USER_DS; - regs.es = __USER_DS; - regs.fs = __KERNEL_PERCPU; - regs.gs = __KERNEL_STACK_CANARY; -#endif - - regs.orig_ax = -1; - regs.ip = (unsigned long) kernel_thread_helper; - regs.cs = __KERNEL_CS | get_kernel_rpl(); - regs.flags = X86_EFLAGS_IF | 0x2; - - /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); -} -EXPORT_SYMBOL(kernel_thread); - void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { -- cgit v1.2.3 From 559df2e0210352f83926d178c40c51142292a18c Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 19 Apr 2009 22:35:10 +0200 Subject: kbuild: move asm-offsets.h to include/generated The simplest method was to add an extra asm-offsets.h file in arch/$ARCH/include/asm that references the generated file. We can now migrate the architectures one-by-one to reference the generated file direct - and when done we can delete the temporary arch/$ARCH/include/asm/asm-offsets.h file. Signed-off-by: Sam Ravnborg Cc: Al Viro Signed-off-by: Michal Marek --- .gitignore | 1 - Kbuild | 2 +- Makefile | 1 - arch/alpha/include/asm/asm-offsets.h | 1 + arch/arm/include/asm/asm-offsets.h | 1 + arch/avr32/include/asm/asm-offsets.h | 1 + arch/blackfin/include/asm/asm-offsets.h | 1 + arch/cris/include/asm/asm-offsets.h | 1 + arch/frv/include/asm/asm-offsets.h | 1 + arch/h8300/include/asm/asm-offsets.h | 1 + arch/ia64/include/asm/asm-offsets.h | 1 + arch/m68k/include/asm/asm-offsets.h | 1 + arch/microblaze/include/asm/asm-offsets.h | 1 + arch/mips/include/asm/asm-offsets.h | 1 + arch/mn10300/include/asm/asm-offsets.h | 1 + arch/parisc/include/asm/asm-offsets.h | 1 + arch/powerpc/include/asm/asm-offsets.h | 1 + arch/s390/include/asm/asm-offsets.h | 1 + arch/sh/include/asm/asm-offsets.h | 1 + arch/sparc/include/asm/asm-offsets.h | 1 + arch/um/Makefile | 2 +- arch/um/include/asm/asm-offsets.h | 1 + arch/x86/include/asm/asm-offsets.h | 1 + arch/xtensa/include/asm/asm-offsets.h | 1 + 24 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 arch/alpha/include/asm/asm-offsets.h create mode 100644 arch/arm/include/asm/asm-offsets.h create mode 100644 arch/avr32/include/asm/asm-offsets.h create mode 100644 arch/blackfin/include/asm/asm-offsets.h create mode 100644 arch/cris/include/asm/asm-offsets.h create mode 100644 arch/frv/include/asm/asm-offsets.h create mode 100644 arch/h8300/include/asm/asm-offsets.h create mode 100644 arch/ia64/include/asm/asm-offsets.h create mode 100644 arch/m68k/include/asm/asm-offsets.h create mode 100644 arch/microblaze/include/asm/asm-offsets.h create mode 100644 arch/mips/include/asm/asm-offsets.h create mode 100644 arch/mn10300/include/asm/asm-offsets.h create mode 100644 arch/parisc/include/asm/asm-offsets.h create mode 100644 arch/powerpc/include/asm/asm-offsets.h create mode 100644 arch/s390/include/asm/asm-offsets.h create mode 100644 arch/sh/include/asm/asm-offsets.h create mode 100644 arch/sparc/include/asm/asm-offsets.h create mode 100644 arch/um/include/asm/asm-offsets.h create mode 100644 arch/x86/include/asm/asm-offsets.h create mode 100644 arch/xtensa/include/asm/asm-offsets.h (limited to 'arch/x86') diff --git a/.gitignore b/.gitignore index 36d9cd6d4281..3582f422813b 100644 --- a/.gitignore +++ b/.gitignore @@ -46,7 +46,6 @@ Module.symvers # Generated include files # include/asm -include/asm-*/asm-offsets.h include/config include/linux/autoconf.h include/linux/compile.h diff --git a/Kbuild b/Kbuild index 1165d7a5ca4a..e3737ad72b5a 100644 --- a/Kbuild +++ b/Kbuild @@ -43,7 +43,7 @@ $(obj)/$(bounds-file): kernel/bounds.s Kbuild # 2) Generate asm-offsets.h # -offsets-file := include/asm/asm-offsets.h +offsets-file := include/generated/asm-offsets.h always += $(offsets-file) targets += $(offsets-file) diff --git a/Makefile b/Makefile index b58e9312ce30..eb43b9fa30b5 100644 --- a/Makefile +++ b/Makefile @@ -1197,7 +1197,6 @@ MRPROPER_DIRS += include/config include2 usr/include include/generated MRPROPER_FILES += .config .config.old include/asm .version .old_version \ include/linux/autoconf.h include/linux/version.h \ include/linux/utsrelease.h \ - include/asm*/asm-offsets.h \ Module.symvers Module.markers tags TAGS cscope* # clean - Delete most, but leave enough to build external modules diff --git a/arch/alpha/include/asm/asm-offsets.h b/arch/alpha/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/alpha/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/arm/include/asm/asm-offsets.h b/arch/arm/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/arm/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/avr32/include/asm/asm-offsets.h b/arch/avr32/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/avr32/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/blackfin/include/asm/asm-offsets.h b/arch/blackfin/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/blackfin/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/cris/include/asm/asm-offsets.h b/arch/cris/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/cris/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/frv/include/asm/asm-offsets.h b/arch/frv/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/frv/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/h8300/include/asm/asm-offsets.h b/arch/h8300/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/h8300/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/ia64/include/asm/asm-offsets.h b/arch/ia64/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/ia64/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/m68k/include/asm/asm-offsets.h b/arch/m68k/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/m68k/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/microblaze/include/asm/asm-offsets.h b/arch/microblaze/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/microblaze/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/mips/include/asm/asm-offsets.h b/arch/mips/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/mips/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/mn10300/include/asm/asm-offsets.h b/arch/mn10300/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/mn10300/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/parisc/include/asm/asm-offsets.h b/arch/parisc/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/parisc/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/powerpc/include/asm/asm-offsets.h b/arch/powerpc/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/powerpc/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/s390/include/asm/asm-offsets.h b/arch/s390/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/s390/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/sh/include/asm/asm-offsets.h b/arch/sh/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/sh/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/sparc/include/asm/asm-offsets.h b/arch/sparc/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/sparc/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/um/Makefile b/arch/um/Makefile index fc633dbacf84..fab8121d2b32 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -149,6 +149,6 @@ $(SHARED_HEADERS)/user_constants.h: $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s $(SHARED_HEADERS)/kern_constants.h: $(Q)mkdir -p $(dir $@) - $(Q)echo '#include "../../../../include/asm/asm-offsets.h"' >$@ + $(Q)echo '#include "../../../../include/generated/asm-offsets.h"' >$@ export SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS HEADER_ARCH DEV_NULL_PATH diff --git a/arch/um/include/asm/asm-offsets.h b/arch/um/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/um/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/x86/include/asm/asm-offsets.h b/arch/x86/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/x86/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/xtensa/include/asm/asm-offsets.h b/arch/xtensa/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/xtensa/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include -- cgit v1.2.3 From 92045954058671fdd0ccf031ca06611ce1d929d1 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 18 Oct 2009 00:36:47 +0200 Subject: kbuild: move compile.h to include/generated Signed-off-by: Sam Ravnborg Signed-off-by: Michal Marek --- .gitignore | 1 - arch/x86/boot/version.c | 2 +- init/Makefile | 8 ++------ init/version.c | 2 +- 4 files changed, 4 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/.gitignore b/.gitignore index e074c1cc5794..c06b4c9aeb72 100644 --- a/.gitignore +++ b/.gitignore @@ -47,7 +47,6 @@ Module.symvers # include/config include/linux/autoconf.h -include/linux/compile.h include/linux/version.h include/linux/utsrelease.h include/generated diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c index 2723d9b5ce43..4d88763e39cb 100644 --- a/arch/x86/boot/version.c +++ b/arch/x86/boot/version.c @@ -14,7 +14,7 @@ #include "boot.h" #include -#include +#include const char kernel_version[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") " diff --git a/init/Makefile b/init/Makefile index 4a243df426f7..0bf677aa0872 100644 --- a/init/Makefile +++ b/init/Makefile @@ -15,12 +15,8 @@ mounts-$(CONFIG_BLK_DEV_RAM) += do_mounts_rd.o mounts-$(CONFIG_BLK_DEV_INITRD) += do_mounts_initrd.o mounts-$(CONFIG_BLK_DEV_MD) += do_mounts_md.o -# files to be removed upon make clean -clean-files := ../include/linux/compile.h - # dependencies on generated files need to be listed explicitly - -$(obj)/version.o: include/linux/compile.h +$(obj)/version.o: include/generated/compile.h # compile.h changes depending on hostname, generation number, etc, # so we regenerate it always. @@ -30,7 +26,7 @@ $(obj)/version.o: include/linux/compile.h chk_compile.h = : quiet_chk_compile.h = echo ' CHK $@' silent_chk_compile.h = : -include/linux/compile.h: FORCE +include/generated/compile.h: FORCE @$($(quiet)chk_compile.h) $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)" diff --git a/init/version.c b/init/version.c index 52a8b98642b8..82328aaca1ef 100644 --- a/init/version.c +++ b/init/version.c @@ -6,7 +6,7 @@ * May be freely distributed as part of Linux. */ -#include +#include #include #include #include -- cgit v1.2.3 From 273b281fa22c293963ee3e6eec418f5dda2dbc83 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 18 Oct 2009 00:52:28 +0200 Subject: kbuild: move utsrelease.h to include/generated Fix up all users of utsrelease.h Signed-off-by: Sam Ravnborg Signed-off-by: Michal Marek --- .gitignore | 1 - Makefile | 5 ++--- arch/alpha/boot/bootp.c | 2 +- arch/alpha/boot/bootpz.c | 2 +- arch/alpha/boot/main.c | 2 +- arch/frv/kernel/setup.c | 2 +- arch/powerpc/platforms/52xx/efika.c | 2 +- arch/powerpc/platforms/amigaone/setup.c | 2 +- arch/powerpc/platforms/chrp/setup.c | 2 +- arch/powerpc/platforms/powermac/bootx_init.c | 2 +- arch/x86/boot/header.S | 2 +- arch/x86/boot/version.c | 2 +- drivers/staging/panel/panel.c | 2 +- include/linux/vermagic.h | 2 +- init/version.c | 2 +- kernel/kexec.c | 2 +- kernel/trace/trace.c | 2 +- 17 files changed, 17 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/.gitignore b/.gitignore index c6c19ea6ea96..002d5304968b 100644 --- a/.gitignore +++ b/.gitignore @@ -47,7 +47,6 @@ Module.symvers # include/config include/linux/version.h -include/linux/utsrelease.h include/generated # stgit generated dirs diff --git a/Makefile b/Makefile index 3bdd932e3d88..860224d7cbcf 100644 --- a/Makefile +++ b/Makefile @@ -968,7 +968,7 @@ endif # prepare2 creates a makefile if using a separate output directory prepare2: prepare3 outputmakefile -prepare1: prepare2 include/linux/version.h include/linux/utsrelease.h \ +prepare1: prepare2 include/linux/version.h include/generated/utsrelease.h \ include/config/auto.conf $(cmd_crmodverdir) @@ -1005,7 +1005,7 @@ endef include/linux/version.h: $(srctree)/Makefile FORCE $(call filechk,version.h) -include/linux/utsrelease.h: include/config/kernel.release FORCE +include/generated/utsrelease.h: include/config/kernel.release FORCE $(call filechk,utsrelease.h) PHONY += headerdep @@ -1151,7 +1151,6 @@ CLEAN_FILES += vmlinux System.map \ MRPROPER_DIRS += include/config usr/include include/generated MRPROPER_FILES += .config .config.old .version .old_version \ include/linux/version.h \ - include/linux/utsrelease.h \ Module.symvers Module.markers tags TAGS cscope* # clean - Delete most, but leave enough to build external modules diff --git a/arch/alpha/boot/bootp.c b/arch/alpha/boot/bootp.c index 3af21c789339..3c8d1b25c661 100644 --- a/arch/alpha/boot/bootp.c +++ b/arch/alpha/boot/bootp.c @@ -9,7 +9,7 @@ */ #include #include -#include +#include #include #include diff --git a/arch/alpha/boot/bootpz.c b/arch/alpha/boot/bootpz.c index 1036b515e20c..ade3f129dc27 100644 --- a/arch/alpha/boot/bootpz.c +++ b/arch/alpha/boot/bootpz.c @@ -11,7 +11,7 @@ */ #include #include -#include +#include #include #include diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c index 89f3be071ae5..644b7db55438 100644 --- a/arch/alpha/boot/main.c +++ b/arch/alpha/boot/main.c @@ -7,7 +7,7 @@ */ #include #include -#include +#include #include #include diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c index 55e4fab7c0bc..75cf7f4b2fa8 100644 --- a/arch/frv/kernel/setup.c +++ b/arch/frv/kernel/setup.c @@ -10,7 +10,7 @@ * 2 of the License, or (at your option) any later version. */ -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index bcc69e1f77c1..45c0cb9b67e6 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -10,7 +10,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c index 9290a7a442d0..fb4eb0df054c 100644 --- a/arch/powerpc/platforms/amigaone/setup.c +++ b/arch/powerpc/platforms/amigaone/setup.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index cd4ad9aea760..0a6f5ab8aab3 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c index cf660916ae0b..9dd789a7370d 100644 --- a/arch/powerpc/platforms/powermac/bootx_init.c +++ b/arch/powerpc/platforms/powermac/bootx_init.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b31cc54b4641..93e689f4bd86 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -16,7 +16,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c index 4d88763e39cb..2b15aa488ffb 100644 --- a/arch/x86/boot/version.c +++ b/arch/x86/boot/version.c @@ -13,7 +13,7 @@ */ #include "boot.h" -#include +#include #include const char kernel_version[] = diff --git a/drivers/staging/panel/panel.c b/drivers/staging/panel/panel.c index 4ce399b6d237..f98a52448eae 100644 --- a/drivers/staging/panel/panel.c +++ b/drivers/staging/panel/panel.c @@ -55,7 +55,7 @@ #include #include #include -#include +#include #include #include diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index 79b9837d9ca0..cf97b5b9d1fe 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -1,4 +1,4 @@ -#include +#include #include /* Simply sanity version stamp for modules. */ diff --git a/init/version.c b/init/version.c index 82328aaca1ef..adff586401a5 100644 --- a/init/version.c +++ b/init/version.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #ifndef CONFIG_KALLSYMS diff --git a/kernel/kexec.c b/kernel/kexec.c index f336e2107f98..83f54e2a6eed 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 88bd9ae2a9ed..bfb1b64bfa9d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -12,7 +12,7 @@ * Copyright (C) 2004 William Lee Irwin III */ #include -#include +#include #include #include #include -- cgit v1.2.3 From 494c2ebfb287eb10b229415063099e3700639028 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 14 Dec 2009 10:02:18 -0800 Subject: x86, msr: Remove incorrect, duplicated code in the MSR driver The MSR driver would compute the values for cpu and c at declaration, and then again in the body of the function. This isn't merely redundant, but unsafe, since cpu might not refer to a valid CPU at that point. Remove the unnecessary and dangerous references in the declarations. This code now matches the equivalent code in the CPUID driver. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/msr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 553449951b84..572b07eee3f4 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -172,11 +172,10 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) static int msr_open(struct inode *inode, struct file *file) { - unsigned int cpu = iminor(file->f_path.dentry->d_inode); - struct cpuinfo_x86 *c = &cpu_data(cpu); + unsigned int cpu; + struct cpuinfo_x86 *c; cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ -- cgit v1.2.3 From 873b5271f878a11729fb4602c6ce967d0ff81119 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 14 Dec 2009 13:55:20 -0800 Subject: x86: Regex support and known-movable symbols for relocs, fix _end This adds a new category of symbols to the relocs program: symbols which are known to be relative, even though the linker emits them as absolute; this is the case for symbols that live in the linker script, which currently applies to _end. Unfortunately the previous workaround of putting _end in its own empty section was defeated by newer binutils, which remove empty sections completely. This patch also changes the symbol matching to use regular expressions instead of hardcoded C for specific patterns. This is a decidedly non-minimal patch: a modified version of the relocs program is used as part of the Syslinux build, and this is basically a backport to Linux of some of those changes; they have thus been well tested. Signed-off-by: H. Peter Anvin LKML-Reference: <4AF86211.3070103@zytor.com> Acked-by: Michal Marek Tested-by: Sedat Dilek --- arch/x86/boot/compressed/relocs.c | 87 ++++++++++++++++++++++++++------------- arch/x86/kernel/vmlinux.lds.S | 4 +- 2 files changed, 60 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c index bbeb0c3fbd90..89bbf4e4d05d 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/boot/compressed/relocs.c @@ -9,6 +9,9 @@ #include #define USE_BSD #include +#include + +static void die(char *fmt, ...); #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) static Elf32_Ehdr ehdr; @@ -30,25 +33,47 @@ static struct section *secs; * the address for which it has been compiled. Don't warn user about * absolute relocations present w.r.t these symbols. */ -static const char* safe_abs_relocs[] = { - "xen_irq_disable_direct_reloc", - "xen_save_fl_direct_reloc", -}; +static const char abs_sym_regex[] = + "^(xen_irq_disable_direct_reloc$|" + "xen_save_fl_direct_reloc$|" + "VDSO|" + "__crc_)"; +static regex_t abs_sym_regex_c; +static int is_abs_reloc(const char *sym_name) +{ + return !regexec(&abs_sym_regex_c, sym_name, 0, NULL, 0); +} -static int is_safe_abs_reloc(const char* sym_name) +/* + * These symbols are known to be relative, even if the linker marks them + * as absolute (typically defined outside any section in the linker script.) + */ +static const char rel_sym_regex[] = + "^_end$"; +static regex_t rel_sym_regex_c; +static int is_rel_reloc(const char *sym_name) { - int i; + return !regexec(&rel_sym_regex_c, sym_name, 0, NULL, 0); +} - for (i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) { - if (!strcmp(sym_name, safe_abs_relocs[i])) - /* Match found */ - return 1; - } - if (strncmp(sym_name, "VDSO", 4) == 0) - return 1; - if (strncmp(sym_name, "__crc_", 6) == 0) - return 1; - return 0; +static void regex_init(void) +{ + char errbuf[128]; + int err; + + err = regcomp(&abs_sym_regex_c, abs_sym_regex, + REG_EXTENDED|REG_NOSUB); + if (err) { + regerror(err, &abs_sym_regex_c, errbuf, sizeof errbuf); + die("%s", errbuf); + } + + err = regcomp(&rel_sym_regex_c, rel_sym_regex, + REG_EXTENDED|REG_NOSUB); + if (err) { + regerror(err, &rel_sym_regex_c, errbuf, sizeof errbuf); + die("%s", errbuf); + } } static void die(char *fmt, ...) @@ -131,7 +156,7 @@ static const char *rel_type(unsigned type) #undef REL_TYPE }; const char *name = "unknown type rel type name"; - if (type < ARRAY_SIZE(type_name)) { + if (type < ARRAY_SIZE(type_name) && type_name[type]) { name = type_name[type]; } return name; @@ -448,7 +473,7 @@ static void print_absolute_relocs(void) * Before warning check if this absolute symbol * relocation is harmless. */ - if (is_safe_abs_reloc(name)) + if (is_abs_reloc(name) || is_rel_reloc(name)) continue; if (!printed) { @@ -501,21 +526,26 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; r_type = ELF32_R_TYPE(rel->r_info); /* Don't visit relocations to absolute symbols */ - if (sym->st_shndx == SHN_ABS) { + if (sym->st_shndx == SHN_ABS && + !is_rel_reloc(sym_name(sym_strtab, sym))) { continue; } - if (r_type == R_386_NONE || r_type == R_386_PC32) { + switch (r_type) { + case R_386_NONE: + case R_386_PC32: /* * NONE can be ignored and and PC relative * relocations don't need to be adjusted. */ - } - else if (r_type == R_386_32) { + break; + case R_386_32: /* Visit relocations that need to be adjusted */ visit(rel, sym); - } - else { - die("Unsupported relocation type: %d\n", r_type); + break; + default: + die("Unsupported relocation type: %s (%d)\n", + rel_type(r_type), r_type); + break; } } } @@ -571,16 +601,15 @@ static void emit_relocs(int as_text) } else { unsigned char buf[4]; - buf[0] = buf[1] = buf[2] = buf[3] = 0; /* Print a stop */ - printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); + fwrite("\0\0\0\0", 4, 1, stdout); /* Now print each relocation */ for (i = 0; i < reloc_count; i++) { buf[0] = (relocs[i] >> 0) & 0xff; buf[1] = (relocs[i] >> 8) & 0xff; buf[2] = (relocs[i] >> 16) & 0xff; buf[3] = (relocs[i] >> 24) & 0xff; - printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); + fwrite(buf, 4, 1, stdout); } } } @@ -598,6 +627,8 @@ int main(int argc, char **argv) FILE *fp; int i; + regex_init(); + show_absolute_syms = 0; show_absolute_relocs = 0; as_text = 0; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index f3f2104408d9..f92a0da608cb 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -319,9 +319,7 @@ SECTIONS __brk_limit = .; } - .end : AT(ADDR(.end) - LOAD_OFFSET) { - _end = .; - } + _end = .; STABS_DEBUG DWARF_DEBUG -- cgit v1.2.3 From 03a05ed1152944000151d57b71000de287a1eb02 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Fri, 11 Dec 2009 15:17:20 +0800 Subject: ACPI: Use the ARB_DISABLE for the CPU which model id is less than 0x0f. Currently, ARB_DISABLE is a NOP on all of the recent Intel platforms. For such platforms, reduce contention on c3_lock by skipping the fake ARB_DISABLE. The cpu model id on one laptop is 14. If we disable ARB_DISABLE on this box, the box can't be booted correctly. But if we still enable ARB_DISABLE on this box, the box can be booted correctly. So we still use the ARB_DISABLE for the cpu which mode id is less than 0x0f. http://bugzilla.kernel.org/show_bug.cgi?id=14700 Signed-off-by: Zhao Yakui Acked-by: Pallipadi, Venkatesh cc: stable@kernel.org Signed-off-by: Len Brown --- arch/x86/kernel/acpi/cstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 59cdfa4686b2..2e837f5080fe 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -48,7 +48,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, * P4, Core and beyond CPUs */ if (c->x86_vendor == X86_VENDOR_INTEL && - (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 14))) + (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f))) flags->bm_control = 0; } EXPORT_SYMBOL(acpi_processor_power_init_bm_check); -- cgit v1.2.3 From e6428047725d72d63c1d9c4ba852e635e3ffe52a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 15 Dec 2009 16:28:13 -0600 Subject: x86: don't export inline function For CONFIG_PARAVIRT, load_gs_index is an inline function (it's #defined to native_load_gs_index otherwise). Exporting an inline function breaks the new assembler-based alphabetical sorted symbol list: Today's linux-next build (x86_64 allmodconfig) failed like this: .tmp_exports-asm.o: In function `__ksymtab_load_gs_index': (__ksymtab_sorted+0x5b40): undefined reference to `load_gs_index' Signed-off-by: Rusty Russell To: x86@kernel.org Cc: alan-jenkins@tuffmail.co.uk --- arch/x86/kernel/x8664_ksyms_64.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index a1029769b6f2..084c1adc45f5 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -56,4 +56,6 @@ EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(init_level4_pgt); -EXPORT_SYMBOL(load_gs_index); +#ifndef CONFIG_PARAVIRT +EXPORT_SYMBOL(native_load_gs_index); +#endif -- cgit v1.2.3 From 186a25026c44d1bfa97671110ff14dcd0c99678e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 15 Dec 2009 20:47:56 +0900 Subject: x86: Split swiotlb initialization into two stages The commit f4780ca005404166cc40af77ef0e86132ab98a81 moves swiotlb initialization before dma32_free_bootmem(). It's supposed to fix a bug that the commit 75f1cdf1dda92cae037ec848ae63690d91913eac introduced, we initialize SWIOTLB right after dma32_free_bootmem so we wrongly steal memory area allocated for GART with broken BIOS earlier. However, the above commit introduced another problem, which likely breaks machines with huge amount of memory. Such a box use the majority of DMA32_ZONE so there is no memory for swiotlb. With this patch, the x86 IOMMU initialization sequence are: 1. We set swiotlb to 1 in the case of (max_pfn > MAX_DMA32_PFN && !no_iommu). If swiotlb usage is forced by the boot option, we go to the step 3 and finish (we don't try to detect IOMMUs). 2. We call the detection functions of all the IOMMUs. The detection function sets x86_init.iommu.iommu_init to the IOMMU initialization function (so we can avoid calling the initialization functions of all the IOMMUs needlessly). 3. We initialize swiotlb (and set dma_ops to swiotlb_dma_ops) if swiotlb is set to 1. 4. If the IOMMU initialization function doesn't need swiotlb (e.g. the initialization is sucessful) then sets swiotlb to zero. 5. If we find that swiotlb is set to zero, we free swiotlb resource. Reported-by: Yinghai Lu Reported-by: Roland Dreier Signed-off-by: FUJITA Tomonori LKML-Reference: <20091215204729A.fujita.tomonori@lab.ntt.co.jp> Tested-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/swiotlb.h | 8 ++++++-- arch/x86/kernel/pci-dma.c | 9 ++++----- arch/x86/kernel/pci-swiotlb.c | 11 +++++++---- 3 files changed, 17 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index 87ffcb12a1b8..8085277e1b8b 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -5,13 +5,17 @@ #ifdef CONFIG_SWIOTLB extern int swiotlb; -extern int pci_swiotlb_init(void); +extern int __init pci_swiotlb_detect(void); +extern void __init pci_swiotlb_init(void); #else #define swiotlb 0 -static inline int pci_swiotlb_init(void) +static inline int pci_swiotlb_detect(void) { return 0; } +static inline void pci_swiotlb_init(void) +{ +} #endif static inline void dma_mark_clean(void *addr, size_t size) {} diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index fcc2f2bfa39c..75e14e21f61a 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -120,15 +120,12 @@ static void __init dma32_free_bootmem(void) void __init pci_iommu_alloc(void) { - int use_swiotlb; - - use_swiotlb = pci_swiotlb_init(); #ifdef CONFIG_X86_64 /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); #endif - if (use_swiotlb) - return; + if (pci_swiotlb_detect()) + goto out; gart_iommu_hole_init(); @@ -138,6 +135,8 @@ void __init pci_iommu_alloc(void) /* needs to be called after gart_iommu_hole_init */ amd_iommu_detect(); +out: + pci_swiotlb_init(); } void *dma_generic_alloc_coherent(struct device *dev, size_t size, diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index e3c0a66b9e77..7d2829dde20e 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -43,12 +43,12 @@ static struct dma_map_ops swiotlb_dma_ops = { }; /* - * pci_swiotlb_init - initialize swiotlb if necessary + * pci_swiotlb_detect - set swiotlb to 1 if necessary * * This returns non-zero if we are forced to use swiotlb (by the boot * option). */ -int __init pci_swiotlb_init(void) +int __init pci_swiotlb_detect(void) { int use_swiotlb = swiotlb | swiotlb_force; @@ -60,10 +60,13 @@ int __init pci_swiotlb_init(void) if (swiotlb_force) swiotlb = 1; + return use_swiotlb; +} + +void __init pci_swiotlb_init(void) +{ if (swiotlb) { swiotlb_init(0); dma_ops = &swiotlb_dma_ops; } - - return use_swiotlb; } -- cgit v1.2.3 From 23637568ad0c9b5ab0ad27d2f2f26d1e9282c527 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 13 Dec 2009 16:04:38 -0600 Subject: x86: Fix kprobes build with non-gawk awk The instruction attribute table generator fails when run by mawk or original-awk: $ mawk -f arch/x86/tools/gen-insn-attr-x86.awk \ arch/x86/lib/x86-opcode-map.txt > /dev/null Semantic error at 240: Second IMM error $ echo $? 1 Line 240 contains "c8: ENTER Iw,Ib", which indicates that this instruction has two immediate operands, the second of which is one byte. The script loops through the immediate operands using a for loop. Unfortunately, there is no guarantee in awk that a for (variable in array) loop will return the indices in increasing order. Internally, both original-awk and mawk iterate over a hash table for this purpose, and both implementations happen to produce the index 2 before 1. The supposed second immediate operand is more than one byte wide, producing the error. So loop over the indices in increasing order instead. As a side-effect, with mawk this means the silly two-entry hash table never has to be built. Signed-off-by: Jonathan Nieder Acked-by Masami Hiramatsu Cc: Jim Keniston Cc: Frederic Weisbecker LKML-Reference: <20091213220437.GA27718@progeny.tock> Signed-off-by: Ingo Molnar --- arch/x86/tools/gen-insn-attr-x86.awk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index e34e92a28eb6..7a6850683c34 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -226,12 +226,12 @@ function add_flags(old,new) { } # convert operands to flags. -function convert_operands(opnd, i,imm,mod) +function convert_operands(count,opnd, i,j,imm,mod) { imm = null mod = null - for (i in opnd) { - i = opnd[i] + for (j = 1; j <= count; j++) { + i = opnd[j] if (match(i, imm_expr) == 1) { if (!imm_flag[i]) semantic_error("Unknown imm opnd: " i) @@ -282,8 +282,8 @@ function convert_operands(opnd, i,imm,mod) # parse one opcode if (match($i, opnd_expr)) { opnd = $i - split($(i++), opnds, ",") - flags = convert_operands(opnds) + count = split($(i++), opnds, ",") + flags = convert_operands(count, opnds) } if (match($i, ext_expr)) ext = $(i++) -- cgit v1.2.3 From 0b962d473af32ec334df271b54ff4973cb2b4c73 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 15 Dec 2009 15:13:07 -0800 Subject: x86, msr/cpuid: Register enough minors for the MSR and CPUID drivers register_chrdev() hardcodes registering 256 minors, presumably to avoid breaking old drivers. However, we need to register enough minors so that we have all possible CPUs. checkpatch warns on this patch, but the patch is correct: NR_CPUS here is a static *upper bound* on the *maximum CPU index* (not *number of CPUs!*) and that is what we want. Reported-and-tested-by: Russ Anderson Cc: Tejun Heo Cc: Alan Cox Cc: Takashi Iwai Cc: Alexander Viro Signed-off-by: H. Peter Anvin LKML-Reference: --- arch/x86/kernel/cpuid.c | 5 +++-- arch/x86/kernel/msr.c | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 7ef24a796992..cb27fd6136c9 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -187,7 +187,8 @@ static int __init cpuid_init(void) int i, err = 0; i = 0; - if (register_chrdev(CPUID_MAJOR, "cpu/cpuid", &cpuid_fops)) { + if (__register_chrdev(CPUID_MAJOR, 0, NR_CPUS, + "cpu/cpuid", &cpuid_fops)) { printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n", CPUID_MAJOR); err = -EBUSY; @@ -216,7 +217,7 @@ out_class: } class_destroy(cpuid_class); out_chrdev: - unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); + __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); out: return err; } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 572b07eee3f4..4bd93c9b2b27 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -246,7 +246,7 @@ static int __init msr_init(void) int i, err = 0; i = 0; - if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) { + if (__register_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr", &msr_fops)) { printk(KERN_ERR "msr: unable to get major %d for msr\n", MSR_MAJOR); err = -EBUSY; @@ -274,7 +274,7 @@ out_class: msr_device_destroy(i); class_destroy(msr_class); out_chrdev: - unregister_chrdev(MSR_MAJOR, "cpu/msr"); + __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); out: return err; } -- cgit v1.2.3 From 5df974009fe513c664303de24725ea0f8b47f12e Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 16 Dec 2009 13:48:04 +0800 Subject: x86: Add IA32_TSC_AUX MSR and use it Clean up write_tsc() and write_tscp_aux() by replacing hardcoded values. No change in functionality. Signed-off-by: Sheng Yang Cc: Avi Kivity Cc: Marcelo Tosatti LKML-Reference: <1260942485-19156-4-git-send-email-sheng@linux.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/msr.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 4ffe09b2ad75..ac98d2914ebf 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -12,6 +12,7 @@ #define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ #define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ #define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ +#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ /* EFER bits: */ #define _EFER_SCE 0 /* SYSCALL/SYSRET */ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 2d228fc9b4b7..cf985aa00660 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -240,9 +240,9 @@ do { \ #define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \ (u32)((val) >> 32)) -#define write_tsc(val1, val2) wrmsr(0x10, (val1), (val2)) +#define write_tsc(val1, val2) wrmsr(MSR_IA32_TSC, (val1), (val2)) -#define write_rdtscp_aux(val) wrmsr(0xc0000103, (val), 0) +#define write_rdtscp_aux(val) wrmsr(MSR_TSC_AUX, (val), 0) struct msr *msrs_alloc(void); void msrs_free(struct msr *msrs); -- cgit v1.2.3 From 853b3da10d617f08340e5fe569c99e7b54f2a568 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 Dec 2009 00:34:13 -0500 Subject: sanitize do_pipe_flags() callers in arch * hpux_pipe() - no need to take BKL * sys32_pipe() in arch/x86/ia32 and xtensa_pipe() in arch/xtensa - no need at all, since both functions are open-coded sys_pipe() Signed-off-by: Al Viro --- arch/parisc/hpux/sys_hpux.c | 7 +------ arch/x86/include/asm/sys_ia32.h | 1 - arch/xtensa/include/asm/syscall.h | 1 - arch/xtensa/include/asm/unistd.h | 2 +- arch/xtensa/kernel/syscall.c | 18 ------------------ 5 files changed, 2 insertions(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c index 18072e03a019..92343bd35fa3 100644 --- a/arch/parisc/hpux/sys_hpux.c +++ b/arch/parisc/hpux/sys_hpux.c @@ -445,12 +445,7 @@ done: int hpux_pipe(int *kstack_fildes) { - int error; - - lock_kernel(); - error = do_pipe_flags(kstack_fildes, 0); - unlock_kernel(); - return error; + return do_pipe_flags(kstack_fildes, 0); } /* lies - says it works, but it really didn't lock anything */ diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 4a5a089e1c62..d5f69045c100 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -30,7 +30,6 @@ struct mmap_arg_struct; asmlinkage long sys32_mmap(struct mmap_arg_struct __user *); asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); -asmlinkage long sys32_pipe(int __user *); struct sigaction32; struct old_sigaction32; asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *, diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h index 4352dbe1186a..efcf33b92e4c 100644 --- a/arch/xtensa/include/asm/syscall.h +++ b/arch/xtensa/include/asm/syscall.h @@ -12,7 +12,6 @@ struct pt_regs; struct sigaction; asmlinkage long xtensa_execve(char*, char**, char**, struct pt_regs*); asmlinkage long xtensa_clone(unsigned long, unsigned long, struct pt_regs*); -asmlinkage long xtensa_pipe(int __user *); asmlinkage long xtensa_ptrace(long, long, long, long); asmlinkage long xtensa_sigreturn(struct pt_regs*); asmlinkage long xtensa_rt_sigreturn(struct pt_regs*); diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h index fbf318b3af3e..528042c2951e 100644 --- a/arch/xtensa/include/asm/unistd.h +++ b/arch/xtensa/include/asm/unistd.h @@ -94,7 +94,7 @@ __SYSCALL( 35, sys_readlink, 3) #define __NR_mknod 36 __SYSCALL( 36, sys_mknod, 3) #define __NR_pipe 37 -__SYSCALL( 37, xtensa_pipe, 1) +__SYSCALL( 37, sys_pipe, 1) #define __NR_unlink 38 __SYSCALL( 38, sys_unlink, 1) #define __NR_rmdir 39 diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c index 1e67bab775c1..816e6d0d686c 100644 --- a/arch/xtensa/kernel/syscall.c +++ b/arch/xtensa/kernel/syscall.c @@ -39,24 +39,6 @@ syscall_t sys_call_table[__NR_syscall_count] /* FIXME __cacheline_aligned */= { #include }; -/* - * xtensa_pipe() is the normal C calling standard for creating a pipe. It's not - * the way unix traditional does this, though. - */ - -asmlinkage long xtensa_pipe(int __user *userfds) -{ - int fd[2]; - int error; - - error = do_pipe_flags(fd, 0); - if (!error) { - if (copy_to_user(userfds, fd, 2 * sizeof(int))) - error = -EFAULT; - } - return error; -} - asmlinkage long xtensa_shmat(int shmid, char __user *shmaddr, int shmflg) { unsigned long ret; -- cgit v1.2.3 From 5714868812b563ba8816c1d974f4f07c76941c30 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 15 Dec 2009 10:19:50 +0900 Subject: PCI: fix section mismatch on update_res() Remark update_res from __init to __devinit as it is called also from __devinit functions. This patch removes the following warning message: WARNING: vmlinux.o(.devinit.text+0x774a): Section mismatch in reference from the function pci_root_bus_res() to the function .init.text:update_res() The function __devinit pci_root_bus_res() references a function __init update_res(). If update_res is only used by pci_root_bus_res then annotate update_res with a matching annotation. Signed-off-by: Jiri Slaby Cc: Aristeu Sergio Cc: Jesse Barnes Cc: linux-pci@vger.kernel.org Cc: x86@kernel.org Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- arch/x86/pci/bus_numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index 145df00e0387..f939d603adfa 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c @@ -51,7 +51,7 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) } } -void __init update_res(struct pci_root_info *info, size_t start, +void __devinit update_res(struct pci_root_info *info, size_t start, size_t end, unsigned long flags, int merge) { int i; -- cgit v1.2.3 From 9d260ebc09a0ad6b5c73e17676df42c7bc75ff64 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 16 Dec 2009 15:43:55 +0100 Subject: x86, amd: Get multi-node CPU info from NodeId MSR instead of PCI config space Use NodeId MSR to get NodeId and number of nodes per processor. Signed-off-by: Andreas Herrmann LKML-Reference: <20091216144355.GB28798@alberich.amd.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cpufeature.h | 1 + arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/amd.c | 53 +++++++++++---------------------------- 3 files changed, 17 insertions(+), 38 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 613700f27a4a..637e1ec963c3 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -153,6 +153,7 @@ #define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */ #define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ #define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ +#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ /* * Auxiliary flags: Linux defined - For features scattered in various diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ac98d2914ebf..1cd58cdbc03f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -124,6 +124,7 @@ #define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff #define FAM10H_MMIO_CONF_BASE_SHIFT 20 +#define MSR_FAM10H_NODE_ID 0xc001100c /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8dc3ea145c97..e485825130d2 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -254,59 +254,36 @@ static int __cpuinit nearby_node(int apicid) /* * Fixup core topology information for AMD multi-node processors. - * Assumption 1: Number of cores in each internal node is the same. - * Assumption 2: Mixed systems with both single-node and dual-node - * processors are not supported. + * Assumption: Number of cores in each internal node is the same. */ #ifdef CONFIG_X86_HT static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) { -#ifdef CONFIG_PCI - u32 t, cpn; - u8 n, n_id; + unsigned long long value; + u32 nodes, cores_per_node; int cpu = smp_processor_id(); + if (!cpu_has(c, X86_FEATURE_NODEID_MSR)) + return; + /* fixup topology information only once for a core */ if (cpu_has(c, X86_FEATURE_AMD_DCM)) return; - /* check for multi-node processor on boot cpu */ - t = read_pci_config(0, 24, 3, 0xe8); - if (!(t & (1 << 29))) + rdmsrl(MSR_FAM10H_NODE_ID, value); + + nodes = ((value >> 3) & 7) + 1; + if (nodes == 1) return; set_cpu_cap(c, X86_FEATURE_AMD_DCM); + cores_per_node = c->x86_max_cores / nodes; - /* cores per node: each internal node has half the number of cores */ - cpn = c->x86_max_cores >> 1; + /* store NodeID, use llc_shared_map to store sibling info */ + per_cpu(cpu_llc_id, cpu) = value & 7; - /* even-numbered NB_id of this dual-node processor */ - n = c->phys_proc_id << 1; - - /* - * determine internal node id and assign cores fifty-fifty to - * each node of the dual-node processor - */ - t = read_pci_config(0, 24 + n, 3, 0xe8); - n = (t>>30) & 0x3; - if (n == 0) { - if (c->cpu_core_id < cpn) - n_id = 0; - else - n_id = 1; - } else { - if (c->cpu_core_id < cpn) - n_id = 1; - else - n_id = 0; - } - - /* compute entire NodeID, use llc_shared_map to store sibling info */ - per_cpu(cpu_llc_id, cpu) = (c->phys_proc_id << 1) + n_id; - - /* fixup core id to be in range from 0 to cpn */ - c->cpu_core_id = c->cpu_core_id % cpn; -#endif + /* fixup core id to be in range from 0 to (cores_per_node - 1) */ + c->cpu_core_id = c->cpu_core_id % cores_per_node; } #endif -- cgit v1.2.3 From 6ede31e03084ee084bcee073ef3d1136f68d0906 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 17 Dec 2009 00:16:25 +0100 Subject: x86, msr: msrs_alloc/free for CONFIG_SMP=n Randy Dunlap reported the following build error: "When CONFIG_SMP=n, CONFIG_X86_MSR=m: ERROR: "msrs_free" [drivers/edac/amd64_edac_mod.ko] undefined! ERROR: "msrs_alloc" [drivers/edac/amd64_edac_mod.ko] undefined!" This is due to the fact that is conditioned on CONFIG_SMP and in the UP case we have only the stubs in the header. Fork off SMP functionality into a new file (msr-smp.c) and build msrs_{alloc,free} unconditionally. Reported-by: Randy Dunlap Cc: H. Peter Anvin Signed-off-by: Borislav Petkov LKML-Reference: <20091216231625.GD27228@liondog.tnic> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/msr.h | 12 +++ arch/x86/lib/Makefile | 4 +- arch/x86/lib/msr-smp.c | 204 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/lib/msr.c | 213 --------------------------------------------- 4 files changed, 218 insertions(+), 215 deletions(-) create mode 100644 arch/x86/lib/msr-smp.c (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index cf985aa00660..c5bc4c2d33f5 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -27,6 +27,18 @@ struct msr { }; }; +struct msr_info { + u32 msr_no; + struct msr reg; + struct msr *msrs; + int err; +}; + +struct msr_regs_info { + u32 *regs; + int err; +}; + static inline unsigned long long native_read_tscp(unsigned int *aux) { unsigned long low, high; diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index a2d6472895fb..706be8bf967b 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -14,7 +14,7 @@ $(obj)/inat.o: $(obj)/inat-tables.c clean-files := inat-tables.c -obj-$(CONFIG_SMP) := msr.o +obj-$(CONFIG_SMP) += msr-smp.o lib-y := delay.o lib-y += thunk_$(BITS).o @@ -22,7 +22,7 @@ lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o lib-y += insn.o inat.o -obj-y += msr-reg.o msr-reg-export.o +obj-y += msr.o msr-reg.o msr-reg-export.o ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c new file mode 100644 index 000000000000..a6b1b86d2253 --- /dev/null +++ b/arch/x86/lib/msr-smp.c @@ -0,0 +1,204 @@ +#include +#include +#include +#include + +static void __rdmsr_on_cpu(void *info) +{ + struct msr_info *rv = info; + struct msr *reg; + int this_cpu = raw_smp_processor_id(); + + if (rv->msrs) + reg = per_cpu_ptr(rv->msrs, this_cpu); + else + reg = &rv->reg; + + rdmsr(rv->msr_no, reg->l, reg->h); +} + +static void __wrmsr_on_cpu(void *info) +{ + struct msr_info *rv = info; + struct msr *reg; + int this_cpu = raw_smp_processor_id(); + + if (rv->msrs) + reg = per_cpu_ptr(rv->msrs, this_cpu); + else + reg = &rv->reg; + + wrmsr(rv->msr_no, reg->l, reg->h); +} + +int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); + *l = rv.reg.l; + *h = rv.reg.h; + + return err; +} +EXPORT_SYMBOL(rdmsr_on_cpu); + +int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + rv.reg.l = l; + rv.reg.h = h; + err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); + + return err; +} +EXPORT_SYMBOL(wrmsr_on_cpu); + +static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, + struct msr *msrs, + void (*msr_func) (void *info)) +{ + struct msr_info rv; + int this_cpu; + + memset(&rv, 0, sizeof(rv)); + + rv.msrs = msrs; + rv.msr_no = msr_no; + + this_cpu = get_cpu(); + + if (cpumask_test_cpu(this_cpu, mask)) + msr_func(&rv); + + smp_call_function_many(mask, msr_func, &rv, 1); + put_cpu(); +} + +/* rdmsr on a bunch of CPUs + * + * @mask: which CPUs + * @msr_no: which MSR + * @msrs: array of MSR values + * + */ +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) +{ + __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); +} +EXPORT_SYMBOL(rdmsr_on_cpus); + +/* + * wrmsr on a bunch of CPUs + * + * @mask: which CPUs + * @msr_no: which MSR + * @msrs: array of MSR values + * + */ +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) +{ + __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); +} +EXPORT_SYMBOL(wrmsr_on_cpus); + +/* These "safe" variants are slower and should be used when the target MSR + may not actually exist. */ +static void __rdmsr_safe_on_cpu(void *info) +{ + struct msr_info *rv = info; + + rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h); +} + +static void __wrmsr_safe_on_cpu(void *info) +{ + struct msr_info *rv = info; + + rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h); +} + +int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); + *l = rv.reg.l; + *h = rv.reg.h; + + return err ? err : rv.err; +} +EXPORT_SYMBOL(rdmsr_safe_on_cpu); + +int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + int err; + struct msr_info rv; + + memset(&rv, 0, sizeof(rv)); + + rv.msr_no = msr_no; + rv.reg.l = l; + rv.reg.h = h; + err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); + + return err ? err : rv.err; +} +EXPORT_SYMBOL(wrmsr_safe_on_cpu); + +/* + * These variants are significantly slower, but allows control over + * the entire 32-bit GPR set. + */ +static void __rdmsr_safe_regs_on_cpu(void *info) +{ + struct msr_regs_info *rv = info; + + rv->err = rdmsr_safe_regs(rv->regs); +} + +static void __wrmsr_safe_regs_on_cpu(void *info) +{ + struct msr_regs_info *rv = info; + + rv->err = wrmsr_safe_regs(rv->regs); +} + +int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) +{ + int err; + struct msr_regs_info rv; + + rv.regs = regs; + rv.err = -EIO; + err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1); + + return err ? err : rv.err; +} +EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu); + +int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) +{ + int err; + struct msr_regs_info rv; + + rv.regs = regs; + rv.err = -EIO; + err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1); + + return err ? err : rv.err; +} +EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu); diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 872834177937..8f8eebdca7d4 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -1,123 +1,7 @@ #include #include -#include #include -struct msr_info { - u32 msr_no; - struct msr reg; - struct msr *msrs; - int err; -}; - -static void __rdmsr_on_cpu(void *info) -{ - struct msr_info *rv = info; - struct msr *reg; - int this_cpu = raw_smp_processor_id(); - - if (rv->msrs) - reg = per_cpu_ptr(rv->msrs, this_cpu); - else - reg = &rv->reg; - - rdmsr(rv->msr_no, reg->l, reg->h); -} - -static void __wrmsr_on_cpu(void *info) -{ - struct msr_info *rv = info; - struct msr *reg; - int this_cpu = raw_smp_processor_id(); - - if (rv->msrs) - reg = per_cpu_ptr(rv->msrs, this_cpu); - else - reg = &rv->reg; - - wrmsr(rv->msr_no, reg->l, reg->h); -} - -int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) -{ - int err; - struct msr_info rv; - - memset(&rv, 0, sizeof(rv)); - - rv.msr_no = msr_no; - err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); - *l = rv.reg.l; - *h = rv.reg.h; - - return err; -} -EXPORT_SYMBOL(rdmsr_on_cpu); - -int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) -{ - int err; - struct msr_info rv; - - memset(&rv, 0, sizeof(rv)); - - rv.msr_no = msr_no; - rv.reg.l = l; - rv.reg.h = h; - err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); - - return err; -} -EXPORT_SYMBOL(wrmsr_on_cpu); - -static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, - struct msr *msrs, - void (*msr_func) (void *info)) -{ - struct msr_info rv; - int this_cpu; - - memset(&rv, 0, sizeof(rv)); - - rv.msrs = msrs; - rv.msr_no = msr_no; - - this_cpu = get_cpu(); - - if (cpumask_test_cpu(this_cpu, mask)) - msr_func(&rv); - - smp_call_function_many(mask, msr_func, &rv, 1); - put_cpu(); -} - -/* rdmsr on a bunch of CPUs - * - * @mask: which CPUs - * @msr_no: which MSR - * @msrs: array of MSR values - * - */ -void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) -{ - __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); -} -EXPORT_SYMBOL(rdmsr_on_cpus); - -/* - * wrmsr on a bunch of CPUs - * - * @mask: which CPUs - * @msr_no: which MSR - * @msrs: array of MSR values - * - */ -void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) -{ - __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); -} -EXPORT_SYMBOL(wrmsr_on_cpus); - struct msr *msrs_alloc(void) { struct msr *msrs = NULL; @@ -137,100 +21,3 @@ void msrs_free(struct msr *msrs) free_percpu(msrs); } EXPORT_SYMBOL(msrs_free); - -/* These "safe" variants are slower and should be used when the target MSR - may not actually exist. */ -static void __rdmsr_safe_on_cpu(void *info) -{ - struct msr_info *rv = info; - - rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h); -} - -static void __wrmsr_safe_on_cpu(void *info) -{ - struct msr_info *rv = info; - - rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h); -} - -int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) -{ - int err; - struct msr_info rv; - - memset(&rv, 0, sizeof(rv)); - - rv.msr_no = msr_no; - err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); - *l = rv.reg.l; - *h = rv.reg.h; - - return err ? err : rv.err; -} -EXPORT_SYMBOL(rdmsr_safe_on_cpu); - -int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) -{ - int err; - struct msr_info rv; - - memset(&rv, 0, sizeof(rv)); - - rv.msr_no = msr_no; - rv.reg.l = l; - rv.reg.h = h; - err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); - - return err ? err : rv.err; -} -EXPORT_SYMBOL(wrmsr_safe_on_cpu); - -/* - * These variants are significantly slower, but allows control over - * the entire 32-bit GPR set. - */ -struct msr_regs_info { - u32 *regs; - int err; -}; - -static void __rdmsr_safe_regs_on_cpu(void *info) -{ - struct msr_regs_info *rv = info; - - rv->err = rdmsr_safe_regs(rv->regs); -} - -static void __wrmsr_safe_regs_on_cpu(void *info) -{ - struct msr_regs_info *rv = info; - - rv->err = wrmsr_safe_regs(rv->regs); -} - -int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) -{ - int err; - struct msr_regs_info rv; - - rv.regs = regs; - rv.err = -EIO; - err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1); - - return err ? err : rv.err; -} -EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu); - -int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs) -{ - int err; - struct msr_regs_info rv; - - rv.regs = regs; - rv.err = -EIO; - err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1); - - return err ? err : rv.err; -} -EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu); -- cgit v1.2.3 From 45a94d7cd45ed991914011919e7d40eb6d2546d1 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 16 Dec 2009 16:25:42 -0800 Subject: x86, cpuid: Add "volatile" to asm in native_cpuid() xsave_cntxt_init() does something like: cpuid(0xd, ..); // find out what features FP/SSE/.. etc are supported xsetbv(); // enable the features known to OS cpuid(0xd, ..); // find out the size of the context for features enabled Depending on what features get enabled in xsetbv(), value of the cpuid.eax=0xd.ecx=0.ebx changes correspondingly (representing the size of the context that is enabled). As we don't have volatile keyword for native_cpuid(), gcc 4.1.2 optimizes away the second cpuid and the kernel continues to use the cpuid information obtained before xsetbv(), ultimately leading to kernel crash on processors supporting more state than the legacy FP/SSE. Add "volatile" for native_cpuid(). Signed-off-by: Suresh Siddha LKML-Reference: <1261009542.2745.55.camel@sbs-t61.sc.intel.com> Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6f8ec1c37e0a..fc801bab1b3b 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -181,7 +181,7 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { /* ecx is often an input as well as an output. */ - asm("cpuid" + asm volatile("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), -- cgit v1.2.3 From 329962503692b42d8088f31584e42d52db179d52 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 15 Dec 2009 17:59:02 -0800 Subject: x86: Fix checking of SRAT when node 0 ram is not from 0 Found one system that boot from socket1 instead of socket0, SRAT get rejected... [ 0.000000] SRAT: Node 1 PXM 0 0-a0000 [ 0.000000] SRAT: Node 1 PXM 0 100000-80000000 [ 0.000000] SRAT: Node 1 PXM 0 100000000-2080000000 [ 0.000000] SRAT: Node 0 PXM 1 2080000000-4080000000 [ 0.000000] SRAT: Node 2 PXM 2 4080000000-6080000000 [ 0.000000] SRAT: Node 3 PXM 3 6080000000-8080000000 [ 0.000000] SRAT: Node 4 PXM 4 8080000000-a080000000 [ 0.000000] SRAT: Node 5 PXM 5 a080000000-c080000000 [ 0.000000] SRAT: Node 6 PXM 6 c080000000-e080000000 [ 0.000000] SRAT: Node 7 PXM 7 e080000000-10080000000 ... [ 0.000000] NUMA: Allocated memnodemap from 500000 - 701040 [ 0.000000] NUMA: Using 20 for the hash shift. [ 0.000000] Adding active range (0, 0x2080000, 0x4080000) 0 entries of 3200 used [ 0.000000] Adding active range (1, 0x0, 0x96) 1 entries of 3200 used [ 0.000000] Adding active range (1, 0x100, 0x7f750) 2 entries of 3200 used [ 0.000000] Adding active range (1, 0x100000, 0x2080000) 3 entries of 3200 used [ 0.000000] Adding active range (2, 0x4080000, 0x6080000) 4 entries of 3200 used [ 0.000000] Adding active range (3, 0x6080000, 0x8080000) 5 entries of 3200 used [ 0.000000] Adding active range (4, 0x8080000, 0xa080000) 6 entries of 3200 used [ 0.000000] Adding active range (5, 0xa080000, 0xc080000) 7 entries of 3200 used [ 0.000000] Adding active range (6, 0xc080000, 0xe080000) 8 entries of 3200 used [ 0.000000] Adding active range (7, 0xe080000, 0x10080000) 9 entries of 3200 used [ 0.000000] SRAT: PXMs only cover 917504MB of your 1048566MB e820 RAM. Not used. [ 0.000000] SRAT: SRAT not used. the early_node_map is not sorted because node0 with non zero start come first. so try to sort it right away after all regions are registered. also fixs refression by 8716273c (x86: Export srat physical topology) -v2: make it more solid to handle cross node case like node0 [0,4g), [8,12g) and node1 [4g, 8g), [12g, 16g) -v3: update comments. Reported-and-tested-by: Jens Axboe Signed-off-by: Yinghai Lu LKML-Reference: <4B2579D2.3010201@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/mm/srat_32.c | 2 ++ arch/x86/mm/srat_64.c | 4 +++- include/linux/mm.h | 3 +++ mm/page_alloc.c | 4 ++-- 4 files changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 6f8aa33031c7..9324f13492d5 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c @@ -267,6 +267,8 @@ int __init get_memcfg_from_srat(void) e820_register_active_regions(chunk->nid, chunk->start_pfn, min(chunk->end_pfn, max_pfn)); } + /* for out of order entries in SRAT */ + sort_node_map(); for_each_online_node(nid) { unsigned long start = node_start_pfn[nid]; diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index d89075489664..a27124185fc1 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -317,7 +317,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) unsigned long s = nodes[i].start >> PAGE_SHIFT; unsigned long e = nodes[i].end >> PAGE_SHIFT; pxmram += e - s; - pxmram -= absent_pages_in_range(s, e); + pxmram -= __absent_pages_in_range(i, s, e); if ((long)pxmram < 0) pxmram = 0; } @@ -373,6 +373,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) for_each_node_mask(i, nodes_parsed) e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, nodes[i].end >> PAGE_SHIFT); + /* for out of order entries in SRAT */ + sort_node_map(); if (!nodes_cover_memory(nodes)) { bad_srat(); return -1; diff --git a/include/linux/mm.h b/include/linux/mm.h index 24c395694f4d..20a2036f3a94 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1022,6 +1022,9 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn, extern void remove_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_all_active_ranges(void); +void sort_node_map(void); +unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, + unsigned long end_pfn); extern unsigned long absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn); extern void get_pfn_range_for_nid(unsigned int nid, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2bc2ac63f41e..873c86308b4e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3573,7 +3573,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid, * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, * then all holes in the requested range will be accounted for. */ -static unsigned long __meminit __absent_pages_in_range(int nid, +unsigned long __meminit __absent_pages_in_range(int nid, unsigned long range_start_pfn, unsigned long range_end_pfn) { @@ -4102,7 +4102,7 @@ static int __init cmp_node_active_region(const void *a, const void *b) } /* sort the node_map by start_pfn */ -static void __init sort_node_map(void) +void __init sort_node_map(void) { sort(early_node_map, (size_t)nr_nodemap_entries, sizeof(struct node_active_region), -- cgit v1.2.3 From 6a1e008a0915f502eb026fb995ea3e49d5b017f7 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 15 Dec 2009 17:59:03 -0800 Subject: x86: Increase MAX_EARLY_RES; insufficient on 32-bit NUMA Due to recent changes wakeup and mptable, we run out of early reservations on 32-bit NUMA. Thus, adjust the available number. Signed-off-by: Yinghai Lu LKML-Reference: <4B22D754.2020706@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/e820.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index f50447d961c0..05ed7ab2ca48 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -724,7 +724,7 @@ core_initcall(e820_mark_nvs_memory); /* * Early reserved memory areas. */ -#define MAX_EARLY_RES 20 +#define MAX_EARLY_RES 32 struct early_res { u64 start, end; -- cgit v1.2.3 From a4636818f8e0991f32d9528f39cf4f3d6a7d30a3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 17 Dec 2009 11:43:29 -0600 Subject: cpumask: rename tsk_cpumask to tsk_cpus_allowed Noone uses this wrapper yet, and Ingo asked that it be kept consistent with current task_struct usage. (One user crept in via linux-next: fixed) Signed-off-by: Rusty Russell Cc: Tejun Heo --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 2 +- include/linux/sched.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index a9df9441a9a2..f125e5c551c0 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1136,7 +1136,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, if (!alloc_cpumask_var(&oldmask, GFP_KERNEL)) return -ENOMEM; - cpumask_copy(oldmask, tsk_cpumask(current)); + cpumask_copy(oldmask, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(pol->cpu)); if (smp_processor_id() != pol->cpu) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 244c287a5ac1..4d7adb282bdd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1555,7 +1555,7 @@ struct task_struct { }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ -#define tsk_cpumask(tsk) (&(tsk)->cpus_allowed) +#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) /* * Priority of a process goes from 0..MAX_PRIO-1, valid RT -- cgit v1.2.3 From 61c1917f47f73c968e92d04d15370b1dc3ec4592 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 17 Dec 2009 05:40:33 +0100 Subject: perf events, x86/stacktrace: Make stack walking optional The current print_context_stack helper that does the stack walking job is good for usual stacktraces as it walks through all the stack and reports even addresses that look unreliable, which is nice when we don't have frame pointers for example. But we have users like perf that only require reliable stacktraces, and those may want a more adapted stack walker, so lets make this function a callback in stacktrace_ops that users can tune for their needs. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras LKML-Reference: <1261024834-5336-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/stacktrace.h | 18 ++++++++++++++++++ arch/x86/kernel/cpu/perf_event.c | 1 + arch/x86/kernel/dumpstack.c | 9 +++++---- arch/x86/kernel/dumpstack.h | 6 ------ arch/x86/kernel/dumpstack_32.c | 2 +- arch/x86/kernel/dumpstack_64.c | 4 ++-- arch/x86/kernel/stacktrace.c | 18 ++++++++++-------- arch/x86/oprofile/backtrace.c | 9 +++++---- kernel/trace/trace_sysprof.c | 1 + 9 files changed, 43 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index cf86a5e73815..6c75151a3cca 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -5,6 +5,23 @@ extern int kstack_depth_to_print; int x86_is_stack_id(int id, char *name); +struct thread_info; +struct stacktrace_ops; + +typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo, + unsigned long *stack, + unsigned long bp, + const struct stacktrace_ops *ops, + void *data, + unsigned long *end, + int *graph); + +extern unsigned long +print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end, int *graph); + /* Generic stack tracer with callbacks */ struct stacktrace_ops { @@ -14,6 +31,7 @@ struct stacktrace_ops { void (*address)(void *data, unsigned long address, int reliable); /* On negative return stop dumping */ int (*stack)(void *data, char *name); + walk_stack_t walk_stack; }; void dump_trace(struct task_struct *tsk, struct pt_regs *regs, diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 45506d5dd8df..d3802ee5a416 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2336,6 +2336,7 @@ static const struct stacktrace_ops backtrace_ops = { .warning_symbol = backtrace_warning_symbol, .stack = backtrace_stack, .address = backtrace_address, + .walk_stack = print_context_stack, }; #include "../dumpstack.h" diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 0a0aa1cec8f1..8aaa119b7cad 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -141,10 +141,11 @@ static void print_trace_address(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops print_trace_ops = { - .warning = print_trace_warning, - .warning_symbol = print_trace_warning_symbol, - .stack = print_trace_stack, - .address = print_trace_address, + .warning = print_trace_warning, + .warning_symbol = print_trace_warning_symbol, + .stack = print_trace_stack, + .address = print_trace_address, + .walk_stack = print_context_stack, }; void diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index 81086c227ab7..4fd1420faffa 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h @@ -14,12 +14,6 @@ #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) #endif -extern unsigned long -print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end, int *graph); - extern void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, char *log_lvl); diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index e0ed4c7abb62..ae775ca47b25 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -58,7 +58,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = print_context_stack(context, stack, bp, ops, data, NULL, &graph); + bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph); stack = (unsigned long *)context->previous_esp; if (!stack) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index b13af53883aa..0ad9597073f5 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -188,8 +188,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (ops->stack(data, id) < 0) break; - bp = print_context_stack(tinfo, stack, bp, ops, - data, estack_end, &graph); + bp = ops->walk_stack(tinfo, stack, bp, ops, + data, estack_end, &graph); ops->stack(data, ""); /* * We link to the next stack via the diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index c3eb207181fe..922eefbb3f6c 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -53,17 +53,19 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable) } static const struct stacktrace_ops save_stack_ops = { - .warning = save_stack_warning, - .warning_symbol = save_stack_warning_symbol, - .stack = save_stack_stack, - .address = save_stack_address, + .warning = save_stack_warning, + .warning_symbol = save_stack_warning_symbol, + .stack = save_stack_stack, + .address = save_stack_address, + .walk_stack = print_context_stack, }; static const struct stacktrace_ops save_stack_ops_nosched = { - .warning = save_stack_warning, - .warning_symbol = save_stack_warning_symbol, - .stack = save_stack_stack, - .address = save_stack_address_nosched, + .warning = save_stack_warning, + .warning_symbol = save_stack_warning_symbol, + .stack = save_stack_stack, + .address = save_stack_address_nosched, + .walk_stack = print_context_stack, }; /* diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 044897be021f..3855096c59b8 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -41,10 +41,11 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) } static struct stacktrace_ops backtrace_ops = { - .warning = backtrace_warning, - .warning_symbol = backtrace_warning_symbol, - .stack = backtrace_stack, - .address = backtrace_address, + .warning = backtrace_warning, + .warning_symbol = backtrace_warning_symbol, + .stack = backtrace_stack, + .address = backtrace_address, + .walk_stack = print_context_stack, }; struct frame_head { diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index f6693969287d..a7974a552ca9 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -93,6 +93,7 @@ static const struct stacktrace_ops backtrace_ops = { .warning_symbol = backtrace_warning_symbol, .stack = backtrace_stack, .address = backtrace_address, + .walk_stack = print_context_stack, }; static int -- cgit v1.2.3 From 06d65bda75341485d32f33da474b0664819ad497 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 17 Dec 2009 05:40:34 +0100 Subject: perf events, x86/stacktrace: Fix performance/softlockup by providing a special frame pointer-only stack walker It's just wasteful for stacktrace users like perf to walk through every entries on the stack whereas these only accept reliable ones, ie: that the frame pointer validates. Since perf requires pure reliable stacktraces, it needs a stack walker based on frame pointers-only to optimize the stacktrace processing. This might solve some near-lockup scenarios that can be triggered by call-graph tracing timer events. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras LKML-Reference: <1261024834-5336-2-git-send-regression-fweisbec@gmail.com> [ v2: fix for modular builds and small detail tidyup ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/stacktrace.h | 6 ++++++ arch/x86/kernel/cpu/perf_event.c | 2 +- arch/x86/kernel/dumpstack.c | 28 ++++++++++++++++++++++++++-- 3 files changed, 33 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 6c75151a3cca..35e89122a42f 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -22,6 +22,12 @@ print_context_stack(struct thread_info *tinfo, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph); +extern unsigned long +print_context_stack_bp(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end, int *graph); + /* Generic stack tracer with callbacks */ struct stacktrace_ops { diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index d3802ee5a416..c223b7e895d9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2336,7 +2336,7 @@ static const struct stacktrace_ops backtrace_ops = { .warning_symbol = backtrace_warning_symbol, .stack = backtrace_stack, .address = backtrace_address, - .walk_stack = print_context_stack, + .walk_stack = print_context_stack_bp, }; #include "../dumpstack.h" diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 8aaa119b7cad..c56bc2873030 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -109,6 +109,30 @@ print_context_stack(struct thread_info *tinfo, } return bp; } +EXPORT_SYMBOL_GPL(print_context_stack); + +unsigned long +print_context_stack_bp(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end, int *graph) +{ + struct stack_frame *frame = (struct stack_frame *)bp; + unsigned long *ret_addr = &frame->return_address; + + while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) { + unsigned long addr = *ret_addr; + + if (__kernel_text_address(addr)) { + ops->address(data, addr, 1); + frame = frame->next_frame; + ret_addr = &frame->return_address; + print_ftrace_graph_addr(addr, data, ops, tinfo, graph); + } + } + return (unsigned long)frame; +} +EXPORT_SYMBOL_GPL(print_context_stack_bp); static void @@ -143,8 +167,8 @@ static void print_trace_address(void *data, unsigned long addr, int reliable) static const struct stacktrace_ops print_trace_ops = { .warning = print_trace_warning, .warning_symbol = print_trace_warning_symbol, - .stack = print_trace_stack, - .address = print_trace_address, + .stack = print_trace_stack, + .address = print_trace_address, .walk_stack = print_context_stack, }; -- cgit v1.2.3 From 4beb3d6d144c41525541cce2b611858b2645c725 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 16 Dec 2009 17:39:48 -0800 Subject: x86: Don't use POSIX character classes in gen-insn-attr-x86.awk Not all awk implementations (including the default awk in Ubuntu 9.10) support POSIX character classes. Since x86-opcode-map.txt is plain ASCII, we can just use explicit ranges for lower case, alphabetic, and alphanumeric characters instead. Signed-off-by: Roland Dreier Acked-by: Masami Hiramatsu LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/tools/gen-insn-attr-x86.awk | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 7a6850683c34..eaf11f52fc0b 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -6,8 +6,6 @@ # Awk implementation sanity check function check_awk_implement() { - if (!match("abc", "[[:lower:]]+")) - return "Your awk doesn't support charactor-class." if (sprintf("%x", 0) != "0") return "Your awk has a printf-format problem." return "" @@ -44,12 +42,12 @@ BEGIN { delete gtable delete atable - opnd_expr = "^[[:alpha:]/]" + opnd_expr = "^[A-Za-z/]" ext_expr = "^\\(" sep_expr = "^\\|$" - group_expr = "^Grp[[:alnum:]]+" + group_expr = "^Grp[0-9A-Za-z]+" - imm_expr = "^[IJAO][[:lower:]]" + imm_expr = "^[IJAO][a-z]" imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" @@ -62,7 +60,7 @@ BEGIN { imm_flag["Ob"] = "INAT_MOFFSET" imm_flag["Ov"] = "INAT_MOFFSET" - modrm_expr = "^([CDEGMNPQRSUVW/][[:lower:]]+|NTA|T[012])" + modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" force64_expr = "\\([df]64\\)" rex_expr = "^REX(\\.[XRWB]+)*" fpu_expr = "^ESC" # TODO -- cgit v1.2.3 From 04a1e62c2cec820501f93526ad1e46073b802dc4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 17 Dec 2009 07:04:56 -0800 Subject: x86/ptrace: make genregs[32]_get/set more robust The loop condition is fragile: we compare an unsigned value to zero, and then decrement it by something larger than one in the loop. All the callers should be passing in appropriately aligned buffer lengths, but it's better to just not rely on it, and have some appropriate defensive loop limits. Acked-by: Roland McGrath Signed-off-by: Linus Torvalds --- arch/x86/kernel/ptrace.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 2779321046bd..017d937639fe 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -509,14 +509,14 @@ static int genregs_get(struct task_struct *target, { if (kbuf) { unsigned long *k = kbuf; - while (count > 0) { + while (count >= sizeof(*k)) { *k++ = getreg(target, pos); count -= sizeof(*k); pos += sizeof(*k); } } else { unsigned long __user *u = ubuf; - while (count > 0) { + while (count >= sizeof(*u)) { if (__put_user(getreg(target, pos), u++)) return -EFAULT; count -= sizeof(*u); @@ -535,14 +535,14 @@ static int genregs_set(struct task_struct *target, int ret = 0; if (kbuf) { const unsigned long *k = kbuf; - while (count > 0 && !ret) { + while (count >= sizeof(*k) && !ret) { ret = putreg(target, pos, *k++); count -= sizeof(*k); pos += sizeof(*k); } } else { const unsigned long __user *u = ubuf; - while (count > 0 && !ret) { + while (count >= sizeof(*u) && !ret) { unsigned long word; ret = __get_user(word, u++); if (ret) @@ -1458,14 +1458,14 @@ static int genregs32_get(struct task_struct *target, { if (kbuf) { compat_ulong_t *k = kbuf; - while (count > 0) { + while (count >= sizeof(*k)) { getreg32(target, pos, k++); count -= sizeof(*k); pos += sizeof(*k); } } else { compat_ulong_t __user *u = ubuf; - while (count > 0) { + while (count >= sizeof(*u)) { compat_ulong_t word; getreg32(target, pos, &word); if (__put_user(word, u++)) @@ -1486,14 +1486,14 @@ static int genregs32_set(struct task_struct *target, int ret = 0; if (kbuf) { const compat_ulong_t *k = kbuf; - while (count > 0 && !ret) { + while (count >= sizeof(*k) && !ret) { ret = putreg32(target, pos, *k++); count -= sizeof(*k); pos += sizeof(*k); } } else { const compat_ulong_t __user *u = ubuf; - while (count > 0 && !ret) { + while (count >= sizeof(*u) && !ret) { compat_ulong_t word; ret = __get_user(word, u++); if (ret) -- cgit v1.2.3 From 6c56ccecf05fafe100ab4ea94f6fccbf5ff00db7 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Thu, 17 Dec 2009 12:27:02 -0800 Subject: x86: Reenable TSC sync check at boot, even with NONSTOP_TSC Commit 83ce4009 did the following change If the TSC is constant and non-stop, also set it reliable. But, there seems to be few systems that will end up with TSC warp across sockets, depending on how the cpus come out of reset. Skipping TSC sync test on such systems may result in time inconsistency later. So, reenable TSC sync test even on constant and non-stop TSC systems. Set, sched_clock_stable to 1 by default and reset it in mark_tsc_unstable, if TSC sync fails. This change still gives perf benefit mentioned in 83ce4009 for systems where TSC is reliable. Signed-off-by: Venkatesh Pallipadi Acked-by: Suresh Siddha LKML-Reference: <20091217202702.GA18015@linux-os.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/intel.c | 1 - arch/x86/kernel/tsc.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 9c31e8b09d2c..879666f4d871 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -70,7 +70,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); sched_clock_stable = 1; } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index cd982f48e23e..597683aa5ba0 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -763,6 +763,7 @@ void mark_tsc_unstable(char *reason) { if (!tsc_unstable) { tsc_unstable = 1; + sched_clock_stable = 0; printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); /* Change only the rating, when not registered */ if (clocksource_tsc.mult) -- cgit v1.2.3 From 8c63450718ea62ee3a70bffde170b4d15fc72d3c Mon Sep 17 00:00:00 2001 From: "akpm@linux-foundation.org" Date: Thu, 17 Dec 2009 15:26:36 -0800 Subject: x86: Fix objdump version check in arch/x86/tools/chkobjdump.awk It says Warning: objdump version is older than 2.19 Warning: Skipping posttest. because it used the wrong field from `objdump -v': akpm:/usr/src/25> /opt/crosstool/gcc-4.0.2-glibc-2.3.6/x86_64-unknown-linux-gnu/bin/x86_64-unknown-linux-gnu-objdump -v GNU objdump 2.16.1 Copyright 2005 Free Software Foundation, Inc. This program is free software; you may redistribute it under the terms of the GNU General Public License. This program has absolutely no warranty. Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton LKML-Reference: <200912172326.nBHNQaQl024796@imap1.linux-foundation.org> Signed-off-by: H. Peter Anvin Cc: Masami Hiramatsu --- arch/x86/tools/chkobjdump.awk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk index 0d13cd9fdcff..5bbb5a33f220 100644 --- a/arch/x86/tools/chkobjdump.awk +++ b/arch/x86/tools/chkobjdump.awk @@ -9,7 +9,7 @@ BEGIN { } /^GNU/ { - split($4, ver, "."); + split($3, ver, "."); if (ver[1] > od_ver || (ver[1] == od_ver && ver[2] >= od_sver)) { exit 1; -- cgit v1.2.3 From 18374d89e5fe96772102f44f535efb1198d9be08 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 17 Dec 2009 18:29:46 -0800 Subject: x86, irq: Allow 0xff for /proc/irq/[n]/smp_affinity on an 8-cpu system John Blackwood reported: > on an older Dell PowerEdge 6650 system with 8 cpus (4 are hyper-threaded), > and 32 bit (x86) kernel, once you change the irq smp_affinity of an irq > to be less than all cpus in the system, you can never change really the > irq smp_affinity back to be all cpus in the system (0xff) again, > even though no error status is returned on the "/bin/echo ff > > /proc/irq/[n]/smp_affinity" operation. > > This is due to that fact that BAD_APICID has the same value as > all cpus (0xff) on 32bit kernels, and thus the value returned from > set_desc_affinity() via the cpu_mask_to_apicid_and() function is treated > as a failure in set_ioapic_affinity_irq_desc(), and no affinity changes > are made. set_desc_affinity() is already checking if the incoming cpu mask intersects with the cpu online mask or not. So there is no need for the apic op cpu_mask_to_apicid_and() to check again and return BAD_APICID. Remove the BAD_APICID return value from cpu_mask_to_apicid_and() and also fix set_desc_affinity() to return -1 instead of using BAD_APICID to represent error conditions (as cpu_mask_to_apicid_and() can return logical or physical apicid values and BAD_APICID is really to represent bad physical apic id). Reported-by: John Blackwood Root-caused-by: John Blackwood Signed-off-by: Suresh Siddha LKML-Reference: <1261103386.2535.409.camel@sbs-t61> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/hw_irq.h | 3 ++- arch/x86/kernel/apic/apic_flat_64.c | 5 +---- arch/x86/kernel/apic/bigsmp_32.c | 5 +---- arch/x86/kernel/apic/io_apic.c | 32 ++++++++++++++------------------ arch/x86/kernel/apic/x2apic_cluster.c | 5 +---- arch/x86/kernel/apic/x2apic_phys.c | 5 +---- arch/x86/kernel/apic/x2apic_uv_x.c | 5 +---- arch/x86/kernel/uv_irq.c | 3 +-- 8 files changed, 22 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 08c48a81841f..eeac829a0f44 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -103,7 +103,8 @@ extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); extern void send_cleanup_vector(struct irq_cfg *); struct irq_desc; -extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *); +extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *, + unsigned int *dest_id); extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); extern void setup_ioapic_dest(void); diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index d0c99abc26c3..eacbd2b31d27 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -306,10 +306,7 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - - return BAD_APICID; + return per_cpu(x86_cpu_to_apicid, cpu); } struct apic apic_physflat = { diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 38dcecfa5818..cb804c5091b9 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -131,10 +131,7 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - if (cpu < nr_cpu_ids) - return bigsmp_cpu_to_logical_apicid(cpu); - - return BAD_APICID; + return bigsmp_cpu_to_logical_apicid(cpu); } static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d5d498fbee4b..98ced709e829 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2276,26 +2276,28 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq /* * Either sets desc->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that, or returns BAD_APICID and + * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and * leaves desc->affinity untouched. */ unsigned int -set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) +set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask, + unsigned int *dest_id) { struct irq_cfg *cfg; unsigned int irq; if (!cpumask_intersects(mask, cpu_online_mask)) - return BAD_APICID; + return -1; irq = desc->irq; cfg = desc->chip_data; if (assign_irq_vector(irq, cfg, mask)) - return BAD_APICID; + return -1; cpumask_copy(desc->affinity, mask); - return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); + *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); + return 0; } static int @@ -2311,12 +2313,11 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) cfg = desc->chip_data; spin_lock_irqsave(&ioapic_lock, flags); - dest = set_desc_affinity(desc, mask); - if (dest != BAD_APICID) { + ret = set_desc_affinity(desc, mask, &dest); + if (!ret) { /* Only the high 8 bits are valid. */ dest = SET_APIC_LOGICAL_ID(dest); __target_IO_APIC_irq(irq, dest, cfg); - ret = 0; } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -3351,8 +3352,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) struct msi_msg msg; unsigned int dest; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; cfg = desc->chip_data; @@ -3384,8 +3384,7 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) if (get_irte(irq, &irte)) return -1; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; irte.vector = cfg->vector; @@ -3567,8 +3566,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) struct msi_msg msg; unsigned int dest; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; cfg = desc->chip_data; @@ -3623,8 +3621,7 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) struct msi_msg msg; unsigned int dest; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; cfg = desc->chip_data; @@ -3730,8 +3727,7 @@ static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) struct irq_cfg *cfg; unsigned int dest; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; cfg = desc->chip_data; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index a5371ec36776..cf69c59f4910 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -148,10 +148,7 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, break; } - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_logical_apicid, cpu); - - return BAD_APICID; + return per_cpu(x86_cpu_to_logical_apicid, cpu); } static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x) diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index a8989aadc99a..8972f38c5ced 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -146,10 +146,7 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, break; } - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - - return BAD_APICID; + return per_cpu(x86_cpu_to_apicid, cpu); } static unsigned int x2apic_phys_get_apic_id(unsigned long x) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index b684bb303cbf..d56b0efb2057 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -225,10 +225,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - if (cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - - return BAD_APICID; + return per_cpu(x86_cpu_to_apicid, cpu); } static unsigned int x2apic_get_apic_id(unsigned long x) diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index 61d805df4c91..ece73d8e3240 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -215,8 +215,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) unsigned long mmr_offset; unsigned mmr_pnode; - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) + if (set_desc_affinity(desc, mask, &dest)) return -1; mmr_value = 0; -- cgit v1.2.3 From 99e8c5a3b875a34d894a711c9a3669858d6adf45 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 17 Dec 2009 01:33:54 +0100 Subject: hw-breakpoints: Fix hardware breakpoints -> perf events dependency The kbuild's select command doesn't propagate through the config dependencies. Hence the current rules of hardware breakpoint's config can't ensure perf can never be disabled under us. We have: config X86 selects HAVE_HW_BREAKPOINTS config HAVE_HW_BREAKPOINTS select PERF_EVENTS config PERF_EVENTS [...] x86 will select the breakpoints but that won't propagate to perf events. The user can still disable the latter, but it is necessary for the breakpoints. What we need is: - x86 selects HAVE_HW_BREAKPOINTS and PERF_EVENTS - HAVE_HW_BREAKPOINTS depends on PERF_EVENTS so that we ensure PERF_EVENTS is enabled and frozen for x86. This fixes the following kind of build errors: In file included from arch/x86/kernel/hw_breakpoint.c:31: include/linux/hw_breakpoint.h: In function 'hw_breakpoint_addr': include/linux/hw_breakpoint.h:39: error: 'struct perf_event' has no member named 'attr' v2: Select also ANON_INODES from x86, required for perf Reported-by: Cyrill Gorcunov Reported-by: Michal Marek Reported-by: Andrew Randrianasulu Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Randy Dunlap Cc: K.Prasad LKML-Reference: <1261010034-7786-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/Kconfig | 4 +--- arch/x86/Kconfig | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/Kconfig b/arch/Kconfig index d82875820a15..9d055b4f0585 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -135,9 +135,7 @@ config HAVE_DEFAULT_NO_SPIN_MUTEXES config HAVE_HW_BREAKPOINT bool - depends on HAVE_PERF_EVENTS - select ANON_INODES - select PERF_EVENTS + depends on PERF_EVENTS config HAVE_USER_RETURN_NOTIFIER bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3b2a5aca4edb..55298e891571 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -50,6 +50,8 @@ config X86 select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA select HAVE_HW_BREAKPOINT + select PERF_EVENTS + select ANON_INODES select HAVE_ARCH_KMEMCHECK select HAVE_USER_RETURN_NOTIFIER -- cgit v1.2.3 From 8bee738bb1979c8bf7b42716b772522ab7d26b0c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 18 Dec 2009 10:40:13 -0500 Subject: x86: Fix objdump version check in chkobjdump.awk for different formats. Different version of objdump says its version in different way; GNU objdump 2.16.1 or GNU objdump version 2.19.51.0.14-1.fc11 20090722 This patch uses the first argument which starts with a number as version string. Changes in v2: - Remove unneeded increment. Signed-off-by: Masami Hiramatsu LKML-Reference: <20091218154012.16960.5113.stgit@dhcp-100-2-132.bos.redhat.com> Suggested-by: H. Peter Anvin Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Signed-off-by: H. Peter Anvin --- arch/x86/tools/chkobjdump.awk | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk index 5bbb5a33f220..fd1ab80be0de 100644 --- a/arch/x86/tools/chkobjdump.awk +++ b/arch/x86/tools/chkobjdump.awk @@ -8,14 +8,24 @@ BEGIN { od_sver = 19; } -/^GNU/ { - split($3, ver, "."); +/^GNU objdump/ { + verstr = "" + for (i = 3; i <= NF; i++) + if (match($(i), "^[0-9]")) { + verstr = $(i); + break; + } + if (verstr == "") { + printf("Warning: Failed to find objdump version number.\n"); + exit 0; + } + split(verstr, ver, "."); if (ver[1] > od_ver || (ver[1] == od_ver && ver[2] >= od_sver)) { exit 1; } else { printf("Warning: objdump version %s is older than %d.%d\n", - $4, od_ver, od_sver); + verstr, od_ver, od_sver); print("Warning: Skipping posttest."); # Logic is inverted, because we just skip test without error. exit 0; -- cgit v1.2.3 From 0f764806438d5576ac58898332e5dcf30bb8a679 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 21 Dec 2009 15:51:23 +0100 Subject: x86/amd-iommu: Fix initialization failure panic The assumption that acpi_table_parse passes the return value of the hanlder function to the caller proved wrong recently. The return value of the handler function is totally ignored. This makes the initialization code for AMD IOMMU buggy in a way that could cause a kernel panic on initialization. This patch fixes the issue in the AMD IOMMU driver. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 1dca9c34eaeb..fb490ce7dd55 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -137,6 +137,11 @@ int amd_iommus_present; /* IOMMUs have a non-present cache? */ bool amd_iommu_np_cache __read_mostly; +/* + * Set to true if ACPI table parsing and hardware intialization went properly + */ +static bool amd_iommu_initialized; + /* * List of protection domains - used during resume */ @@ -929,6 +934,8 @@ static int __init init_iommu_all(struct acpi_table_header *table) } WARN_ON(p != end); + amd_iommu_initialized = true; + return 0; } @@ -1263,6 +1270,9 @@ static int __init amd_iommu_init(void) if (acpi_table_parse("IVRS", init_iommu_all) != 0) goto free; + if (!amd_iommu_initialized) + goto free; + if (acpi_table_parse("IVRS", init_memory_definitions) != 0) goto free; -- cgit v1.2.3 From 1d9cb470a755409ce97c3376174b1e234bd20371 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Sun, 20 Dec 2009 12:19:14 -0700 Subject: ACPI: processor: introduce arch_has_acpi_pdc arch dependent helper function that tells us if we should attempt to evaluate _PDC on this machine or not. The x86 implementation assumes that the CPUs in the machine must be homogeneous, and that you cannot mix CPUs of different vendors. Cc: Tony Luck Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Alex Chiang Signed-off-by: Len Brown --- arch/ia64/include/asm/acpi.h | 2 ++ arch/x86/include/asm/acpi.h | 7 +++++++ arch/x86/kernel/acpi/processor.c | 4 +--- drivers/acpi/processor_pdc.c | 3 +++ 4 files changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h index 91df9686a0da..3b7888294648 100644 --- a/arch/ia64/include/asm/acpi.h +++ b/arch/ia64/include/asm/acpi.h @@ -132,6 +132,8 @@ extern int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS]; extern int __initdata nid_to_pxm_map[MAX_NUMNODES]; #endif +static inline bool arch_has_acpi_pdc(void) { return true; } + #define acpi_unlazy_tlb(x) #ifdef CONFIG_ACPI_NUMA diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 60d2b2db0bc5..d787e6e92bd1 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -142,6 +142,13 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) return max_cstate; } +static inline bool arch_has_acpi_pdc(void) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + return (c->x86_vendor == X86_VENDOR_INTEL || + c->x86_vendor == X86_VENDOR_CENTAUR); +} + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index d85d1b2432ba..bcb6efe08c5d 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -79,9 +79,7 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr) struct cpuinfo_x86 *c = &cpu_data(pr->id); pr->pdc = NULL; - if (c->x86_vendor == X86_VENDOR_INTEL || - c->x86_vendor == X86_VENDOR_CENTAUR) - init_intel_pdc(pr, c); + init_intel_pdc(pr, c); return; } diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c index b416c32dda04..931e735e9e37 100644 --- a/drivers/acpi/processor_pdc.c +++ b/drivers/acpi/processor_pdc.c @@ -69,6 +69,9 @@ static int acpi_processor_eval_pdc(struct acpi_processor *pr) void acpi_processor_set_pdc(struct acpi_processor *pr) { + if (arch_has_acpi_pdc() == false) + return; + arch_acpi_processor_init_pdc(pr); acpi_processor_eval_pdc(pr); arch_acpi_processor_cleanup_pdc(pr); -- cgit v1.2.3 From 407cd87c54e76c266245e8faef8dd4a84b7254fe Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Sun, 20 Dec 2009 12:19:19 -0700 Subject: ACPI: processor: unify arch_acpi_processor_init_pdc The x86 and ia64 implementations of arch_acpi_processor_init_pdc() are almost exactly the same. The only difference is in what bits they set in obj_list buffer. Combine the boilerplate memory management code, and leave the arch-specific bit twiddling in separate implementations. Cc: Tony Luck Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Alex Chiang Signed-off-by: Len Brown --- arch/ia64/kernel/acpi-processor.c | 34 +---------------------------- arch/x86/kernel/acpi/processor.c | 34 +---------------------------- drivers/acpi/processor_pdc.c | 45 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 46 insertions(+), 67 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c index dbda7bde6112..ab72d46f52c6 100644 --- a/arch/ia64/kernel/acpi-processor.c +++ b/arch/ia64/kernel/acpi-processor.c @@ -16,31 +16,7 @@ static void init_intel_pdc(struct acpi_processor *pr) { - struct acpi_object_list *obj_list; - union acpi_object *obj; - u32 *buf; - - /* allocate and initialize pdc. It will be used later. */ - obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); - if (!obj_list) { - printk(KERN_ERR "Memory allocation error\n"); - return; - } - - obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); - if (!obj) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(obj_list); - return; - } - - buf = kmalloc(12, GFP_KERNEL); - if (!buf) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(obj); - kfree(obj_list); - return; - } + u32 *buf = (u32 *)pr->pdc->pointer->buffer.pointer; buf[0] = ACPI_PDC_REVISION_ID; buf[1] = 1; @@ -52,20 +28,12 @@ static void init_intel_pdc(struct acpi_processor *pr) */ buf[2] |= ACPI_PDC_SMP_T_SWCOORD; - obj->type = ACPI_TYPE_BUFFER; - obj->buffer.length = 12; - obj->buffer.pointer = (u8 *) buf; - obj_list->count = 1; - obj_list->pointer = obj; - pr->pdc = obj_list; - return; } /* Initialize _PDC data based on the CPU vendor */ void arch_acpi_processor_init_pdc(struct acpi_processor *pr) { - pr->pdc = NULL; init_intel_pdc(pr); return; } diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index bcb6efe08c5d..967860b43f2a 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -14,31 +14,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) { - struct acpi_object_list *obj_list; - union acpi_object *obj; - u32 *buf; - - /* allocate and initialize pdc. It will be used later. */ - obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); - if (!obj_list) { - printk(KERN_ERR "Memory allocation error\n"); - return; - } - - obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); - if (!obj) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(obj_list); - return; - } - - buf = kmalloc(12, GFP_KERNEL); - if (!buf) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(obj); - kfree(obj_list); - return; - } + u32 *buf = (u32 *)pr->pdc->pointer->buffer.pointer; buf[0] = ACPI_PDC_REVISION_ID; buf[1] = 1; @@ -62,13 +38,6 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_MWAIT)) buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); - obj->type = ACPI_TYPE_BUFFER; - obj->buffer.length = 12; - obj->buffer.pointer = (u8 *) buf; - obj_list->count = 1; - obj_list->pointer = obj; - pr->pdc = obj_list; - return; } @@ -78,7 +47,6 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr) { struct cpuinfo_x86 *c = &cpu_data(pr->id); - pr->pdc = NULL; init_intel_pdc(pr, c); return; diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c index 931e735e9e37..87946b6da765 100644 --- a/drivers/acpi/processor_pdc.c +++ b/drivers/acpi/processor_pdc.c @@ -33,6 +33,49 @@ static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = { {}, }; +static void acpi_processor_init_pdc(struct acpi_processor *pr) +{ + struct acpi_object_list *obj_list; + union acpi_object *obj; + u32 *buf; + + pr->pdc = NULL; + + /* allocate and initialize pdc. It will be used later. */ + obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); + if (!obj_list) { + printk(KERN_ERR "Memory allocation error\n"); + return; + } + + obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); + if (!obj) { + printk(KERN_ERR "Memory allocation error\n"); + kfree(obj_list); + return; + } + + buf = kmalloc(12, GFP_KERNEL); + if (!buf) { + printk(KERN_ERR "Memory allocation error\n"); + kfree(obj); + kfree(obj_list); + return; + } + + obj->type = ACPI_TYPE_BUFFER; + obj->buffer.length = 12; + obj->buffer.pointer = (u8 *) buf; + obj_list->count = 1; + obj_list->pointer = obj; + pr->pdc = obj_list; + + /* Now let the arch do the bit-twiddling to buf[] */ + arch_acpi_processor_init_pdc(pr); + + return; +} + /* * _PDC is required for a BIOS-OS handshake for most of the newer * ACPI processor features. @@ -72,7 +115,7 @@ void acpi_processor_set_pdc(struct acpi_processor *pr) if (arch_has_acpi_pdc() == false) return; - arch_acpi_processor_init_pdc(pr); + acpi_processor_init_pdc(pr); acpi_processor_eval_pdc(pr); arch_acpi_processor_cleanup_pdc(pr); } -- cgit v1.2.3 From 08ea48a326d8030ef5b7fb02292faf5a53c95e0a Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Sun, 20 Dec 2009 12:19:24 -0700 Subject: ACPI: processor: factor out common _PDC settings Both x86 and ia64 initialize _PDC with mostly common bit settings. Factor out the common settings and leave the arch-specific ones alone. Cc: Tony Luck Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Alex Chiang Signed-off-by: Len Brown --- arch/ia64/kernel/acpi-processor.c | 10 +--------- arch/x86/kernel/acpi/processor.c | 10 +--------- drivers/acpi/processor_pdc.c | 11 +++++++++++ 3 files changed, 13 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c index ab72d46f52c6..ebe23f58bd6e 100644 --- a/arch/ia64/kernel/acpi-processor.c +++ b/arch/ia64/kernel/acpi-processor.c @@ -18,15 +18,7 @@ static void init_intel_pdc(struct acpi_processor *pr) { u32 *buf = (u32 *)pr->pdc->pointer->buffer.pointer; - buf[0] = ACPI_PDC_REVISION_ID; - buf[1] = 1; - buf[2] = ACPI_PDC_EST_CAPABILITY_SMP; - /* - * The default of PDC_SMP_T_SWCOORD bit is set for IA64 cpu so - * that OSPM is capable of native ACPI throttling software - * coordination using BIOS supplied _TSD info. - */ - buf[2] |= ACPI_PDC_SMP_T_SWCOORD; + buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; return; } diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index 967860b43f2a..d722ca8cb4c7 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -16,16 +16,8 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) { u32 *buf = (u32 *)pr->pdc->pointer->buffer.pointer; - buf[0] = ACPI_PDC_REVISION_ID; - buf[1] = 1; - buf[2] = ACPI_PDC_C_CAPABILITY_SMP; + buf[2] |= ACPI_PDC_C_CAPABILITY_SMP; - /* - * The default of PDC_SMP_T_SWCOORD bit is set for intel x86 cpu so - * that OSPM is capable of native ACPI throttling software - * coordination using BIOS supplied _TSD info. - */ - buf[2] |= ACPI_PDC_SMP_T_SWCOORD; if (cpu_has(c, X86_FEATURE_EST)) buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c index 87946b6da765..ccda7c95d073 100644 --- a/drivers/acpi/processor_pdc.c +++ b/drivers/acpi/processor_pdc.c @@ -33,6 +33,15 @@ static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = { {}, }; +static void acpi_set_pdc_bits(u32 *buf) +{ + buf[0] = ACPI_PDC_REVISION_ID; + buf[1] = 1; + + /* Enable coordination with firmware's _TSD info */ + buf[2] = ACPI_PDC_SMP_T_SWCOORD; +} + static void acpi_processor_init_pdc(struct acpi_processor *pr) { struct acpi_object_list *obj_list; @@ -63,6 +72,8 @@ static void acpi_processor_init_pdc(struct acpi_processor *pr) return; } + acpi_set_pdc_bits(buf); + obj->type = ACPI_TYPE_BUFFER; obj->buffer.length = 12; obj->buffer.pointer = (u8 *) buf; -- cgit v1.2.3 From 6c5807d7bc7d051fce00863ffb98d36325501eb2 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Sun, 20 Dec 2009 12:19:29 -0700 Subject: ACPI: processor: finish unifying arch_acpi_processor_init_pdc() The only thing arch-specific about calling _PDC is what bits get set in the input obj_list buffer. There's no need for several levels of indirection to twiddle those bits. Additionally, since we're just messing around with a buffer, we can simplify the interface; no need to pass around the entire struct acpi_processor * just to get at the buffer. Cc: Tony Luck Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Alex Chiang Signed-off-by: Len Brown --- arch/ia64/include/asm/acpi.h | 4 ++++ arch/ia64/kernel/acpi-processor.c | 18 ------------------ arch/x86/include/asm/acpi.h | 19 +++++++++++++++++++ arch/x86/kernel/acpi/processor.c | 34 ---------------------------------- drivers/acpi/processor_pdc.c | 6 +++--- include/acpi/processor.h | 1 - 6 files changed, 26 insertions(+), 56 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h index 3b7888294648..7ae58892ba8d 100644 --- a/arch/ia64/include/asm/acpi.h +++ b/arch/ia64/include/asm/acpi.h @@ -133,6 +133,10 @@ extern int __initdata nid_to_pxm_map[MAX_NUMNODES]; #endif static inline bool arch_has_acpi_pdc(void) { return true; } +static inline void arch_acpi_set_pdc_bits(u32 *buf) +{ + buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; +} #define acpi_unlazy_tlb(x) diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c index ebe23f58bd6e..7ba5accebf66 100644 --- a/arch/ia64/kernel/acpi-processor.c +++ b/arch/ia64/kernel/acpi-processor.c @@ -14,24 +14,6 @@ #include #include -static void init_intel_pdc(struct acpi_processor *pr) -{ - u32 *buf = (u32 *)pr->pdc->pointer->buffer.pointer; - - buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; - - return; -} - -/* Initialize _PDC data based on the CPU vendor */ -void arch_acpi_processor_init_pdc(struct acpi_processor *pr) -{ - init_intel_pdc(pr); - return; -} - -EXPORT_SYMBOL(arch_acpi_processor_init_pdc); - void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr) { if (pr->pdc) { diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index d787e6e92bd1..56f462cf22d2 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -149,6 +149,25 @@ static inline bool arch_has_acpi_pdc(void) c->x86_vendor == X86_VENDOR_CENTAUR); } +static inline void arch_acpi_set_pdc_bits(u32 *buf) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + + buf[2] |= ACPI_PDC_C_CAPABILITY_SMP; + + if (cpu_has(c, X86_FEATURE_EST)) + buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; + + if (cpu_has(c, X86_FEATURE_ACPI)) + buf[2] |= ACPI_PDC_T_FFH; + + /* + * If mwait/monitor is unsupported, C2/C3_FFH will be disabled + */ + if (!cpu_has(c, X86_FEATURE_MWAIT)) + buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); +} + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index d722ca8cb4c7..0f57307f8224 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -12,40 +12,6 @@ #include #include -static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) -{ - u32 *buf = (u32 *)pr->pdc->pointer->buffer.pointer; - - buf[2] |= ACPI_PDC_C_CAPABILITY_SMP; - - if (cpu_has(c, X86_FEATURE_EST)) - buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; - - if (cpu_has(c, X86_FEATURE_ACPI)) - buf[2] |= ACPI_PDC_T_FFH; - - /* - * If mwait/monitor is unsupported, C2/C3_FFH will be disabled - */ - if (!cpu_has(c, X86_FEATURE_MWAIT)) - buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); - - return; -} - - -/* Initialize _PDC data based on the CPU vendor */ -void arch_acpi_processor_init_pdc(struct acpi_processor *pr) -{ - struct cpuinfo_x86 *c = &cpu_data(pr->id); - - init_intel_pdc(pr, c); - - return; -} - -EXPORT_SYMBOL(arch_acpi_processor_init_pdc); - void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr) { if (pr->pdc) { diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c index ccda7c95d073..48df08ebcec4 100644 --- a/drivers/acpi/processor_pdc.c +++ b/drivers/acpi/processor_pdc.c @@ -40,6 +40,9 @@ static void acpi_set_pdc_bits(u32 *buf) /* Enable coordination with firmware's _TSD info */ buf[2] = ACPI_PDC_SMP_T_SWCOORD; + + /* Twiddle arch-specific bits needed for _PDC */ + arch_acpi_set_pdc_bits(buf); } static void acpi_processor_init_pdc(struct acpi_processor *pr) @@ -81,9 +84,6 @@ static void acpi_processor_init_pdc(struct acpi_processor *pr) obj_list->pointer = obj; pr->pdc = obj_list; - /* Now let the arch do the bit-twiddling to buf[] */ - arch_acpi_processor_init_pdc(pr); - return; } diff --git a/include/acpi/processor.h b/include/acpi/processor.h index a1b748a7a766..50edd734aec6 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -257,7 +257,6 @@ int acpi_processor_notify_smm(struct module *calling_module); DECLARE_PER_CPU(struct acpi_processor *, processors); extern struct acpi_processor_errata errata; -void arch_acpi_processor_init_pdc(struct acpi_processor *pr); void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr); #ifdef ARCH_HAS_POWER_INIT -- cgit v1.2.3 From 47817254b8637b56730aec26eed2c337d3938bb5 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Sun, 20 Dec 2009 12:19:34 -0700 Subject: ACPI: processor: unify arch_acpi_processor_cleanup_pdc The x86 and ia64 implementations of the function in $subject are exactly the same. Also, since the arch-specific implementations of setting _PDC have been completely hollowed out, remove the empty shells. Cc: Tony Luck Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Alex Chiang Signed-off-by: Len Brown --- arch/ia64/kernel/Makefile | 4 ---- arch/ia64/kernel/acpi-processor.c | 27 --------------------------- arch/x86/kernel/acpi/Makefile | 2 +- arch/x86/kernel/acpi/processor.c | 25 ------------------------- drivers/acpi/processor_pdc.c | 21 ++++++++++++++++++++- include/acpi/processor.h | 2 -- 6 files changed, 21 insertions(+), 60 deletions(-) delete mode 100644 arch/ia64/kernel/acpi-processor.c delete mode 100644 arch/x86/kernel/acpi/processor.c (limited to 'arch/x86') diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 2a75e937ae8d..e1236349c99f 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -18,10 +18,6 @@ obj-$(CONFIG_IA64_GENERIC) += acpi-ext.o obj-$(CONFIG_IA64_HP_ZX1) += acpi-ext.o obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o -ifneq ($(CONFIG_ACPI_PROCESSOR),) -obj-y += acpi-processor.o -endif - obj-$(CONFIG_IA64_PALINFO) += palinfo.o obj-$(CONFIG_IOSAPIC) += iosapic.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c deleted file mode 100644 index 7ba5accebf66..000000000000 --- a/arch/ia64/kernel/acpi-processor.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * arch/ia64/kernel/acpi-processor.c - * - * Copyright (C) 2005 Intel Corporation - * Venkatesh Pallipadi - * - Added _PDC for platforms with Intel CPUs - */ - -#include -#include -#include -#include - -#include -#include - -void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr) -{ - if (pr->pdc) { - kfree(pr->pdc->pointer->buffer.pointer); - kfree(pr->pdc->pointer); - kfree(pr->pdc); - pr->pdc = NULL; - } -} - -EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc); diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index fd5ca97a2ad5..6f35260bb3ef 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_rm.o wakeup_$(BITS).o ifneq ($(CONFIG_ACPI_PROCESSOR),) -obj-y += cstate.o processor.o +obj-y += cstate.o endif $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c deleted file mode 100644 index 0f57307f8224..000000000000 --- a/arch/x86/kernel/acpi/processor.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (C) 2005 Intel Corporation - * Venkatesh Pallipadi - * - Added _PDC for platforms with Intel CPUs - */ - -#include -#include -#include -#include - -#include -#include - -void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr) -{ - if (pr->pdc) { - kfree(pr->pdc->pointer->buffer.pointer); - kfree(pr->pdc->pointer); - kfree(pr->pdc); - pr->pdc = NULL; - } -} - -EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc); diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c index 48df08ebcec4..e786e2ce1882 100644 --- a/drivers/acpi/processor_pdc.c +++ b/drivers/acpi/processor_pdc.c @@ -1,3 +1,12 @@ +/* + * Copyright (C) 2005 Intel Corporation + * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. + * + * Alex Chiang + * - Unified x86/ia64 implementations + * Venkatesh Pallipadi + * - Added _PDC for platforms with Intel CPUs + */ #include #include @@ -121,6 +130,16 @@ static int acpi_processor_eval_pdc(struct acpi_processor *pr) return status; } +static void acpi_processor_cleanup_pdc(struct acpi_processor *pr) +{ + if (pr->pdc) { + kfree(pr->pdc->pointer->buffer.pointer); + kfree(pr->pdc->pointer); + kfree(pr->pdc); + pr->pdc = NULL; + } +} + void acpi_processor_set_pdc(struct acpi_processor *pr) { if (arch_has_acpi_pdc() == false) @@ -128,7 +147,7 @@ void acpi_processor_set_pdc(struct acpi_processor *pr) acpi_processor_init_pdc(pr); acpi_processor_eval_pdc(pr); - arch_acpi_processor_cleanup_pdc(pr); + acpi_processor_cleanup_pdc(pr); } EXPORT_SYMBOL_GPL(acpi_processor_set_pdc); diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 50edd734aec6..0873cd57510c 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -257,8 +257,6 @@ int acpi_processor_notify_smm(struct module *calling_module); DECLARE_PER_CPU(struct acpi_processor *, processors); extern struct acpi_processor_errata errata; -void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr); - #ifdef ARCH_HAS_POWER_INIT void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, unsigned int cpu); -- cgit v1.2.3 From 4a28395d72a956f2dad24e343d06bc08c9afb89a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 21 Dec 2009 16:19:58 -0800 Subject: arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c: avoid cross-CPU interrupts by using smp_call_function_any() Presently acpi-cpufreq will perform the MSR read on the first CPU in the mask. That's inefficient if that CPU differs from the current CPU. Because we have to perform a cross-CPU call, but we could have run the rdmsr on the current CPU. So switch to using the new smp_call_function_any(), which will perform the call on the current CPU if that CPU is present in the mask (it is). Cc: "Zhang, Yanmin" Cc: Dave Jones Cc: Ingo Molnar Cc: Jaswinder Singh Rajput Cc: Len Brown Cc: Mike Galbraith Cc: Rusty Russell Cc: Thomas Gleixner Cc: Venkatesh Pallipadi Cc: Zhao Yakui Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index f28decf8dde3..1b1920fa7c80 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -190,9 +190,11 @@ static void do_drv_write(void *_cmd) static void drv_read(struct drv_cmd *cmd) { + int err; cmd->val = 0; - smp_call_function_single(cpumask_any(cmd->mask), do_drv_read, cmd, 1); + err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1); + WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */ } static void drv_write(struct drv_cmd *cmd) -- cgit v1.2.3 From 2f99f5c8f05e02f3df1bb4d93b6704e6f5972872 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 23 Dec 2009 15:04:53 -0800 Subject: Revert "x86, ucode-amd: Ensure ucode update on suspend/resume after CPU off/online cycle" This reverts commit 9f15226e75583547aaf542c6be4bdac1060dd425. It's just wrong, and broke resume for Rafael even on a non-AMD CPU. As Rafael says: "... it causes microcode_init_cpu() to be called during resume even for CPUs for which there's no microcode to apply. That, in turn, results in executing request_firmware() (on Intel CPUs at least) which doesn't work at this stage of resume (we have device interrupts disabled, I/O devices are still suspended and so on). If I'm not mistaken, the "if (uci->valid)" logic means "if that CPU is known to us" , so before commit 9f15226e755 microcode_resume_cpu() was called for all CPUs already in the system during suspend, which was the right thing to do. The commit changed it so that the CPUs without microcode to apply are now treated as "unknown", which is not quite right. The problem this commit attempted to solve has to be handled differently." Bisected-and -requested-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- arch/x86/kernel/microcode_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 844c02c65fcb..0c8632433090 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -394,7 +394,7 @@ static enum ucode_state microcode_update_cpu(int cpu) struct ucode_cpu_info *uci = ucode_cpu_info + cpu; enum ucode_state ustate; - if (uci->valid && uci->mc) + if (uci->valid) ustate = microcode_resume_cpu(cpu); else ustate = microcode_init_cpu(cpu); -- cgit v1.2.3 From 17a2a9b57a9a7d2fd8f97df951b5e63e0bd56ef5 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 25 Dec 2009 15:40:38 -0800 Subject: x86, compress: Force i386 instructions for the decompressor Recently, some distros have started shipping versions of gcc which default to -march=i686. This breaks building kernels for pre-i686 machines, even if they have been selected in Kconfig, due to the generation of CMOV instructions. There isn't enough benefit to try to preserve the generation of these instructions even when selected, so simply force -march=i386 for the decompressor when building a 32-bit kernel. Reported-and-tested-by: Chris Rankin Signed-off-by: H. Peter Anvin LKML-Reference: <219280.97558.qm@web52907.mail.re2.yahoo.com> --- arch/x86/boot/compressed/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index f8ed0658404c..f25bbd37765a 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -9,6 +9,7 @@ targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinu KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -fno-strict-aliasing -fPIC KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +cflags-$(CONFIG_X86_32) := -march=i386 cflags-$(CONFIG_X86_64) := -mcmodel=small KBUILD_CFLAGS += $(cflags-y) KBUILD_CFLAGS += $(call cc-option,-ffreestanding) -- cgit v1.2.3 From fb341f572d26e0786167cd96b90cc4febed830cf Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Sat, 5 Dec 2009 12:34:11 -0200 Subject: KVM: MMU: remove prefault from invlpg handler The invlpg prefault optimization breaks Windows 2008 R2 occasionally. The visible effect is that the invlpg handler instantiates a pte which is, microseconds later, written with a different gfn by another vcpu. The OS could have other mechanisms to prevent a present translation from being used, which the hypervisor is unaware of. While the documentation states that the cpu is at liberty to prefetch tlb entries, it looks like this is not heeded, so remove tlb prefetch from invlpg. Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a6017132fba8..58a0f1e88596 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -455,8 +455,6 @@ out_unlock: static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { struct kvm_shadow_walk_iterator iterator; - pt_element_t gpte; - gpa_t pte_gpa = -1; int level; u64 *sptep; int need_flush = 0; @@ -470,10 +468,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (level == PT_PAGE_TABLE_LEVEL || ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { - struct kvm_mmu_page *sp = page_header(__pa(sptep)); - - pte_gpa = (sp->gfn << PAGE_SHIFT); - pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); if (is_shadow_present_pte(*sptep)) { rmap_remove(vcpu->kvm, sptep); @@ -492,18 +486,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (need_flush) kvm_flush_remote_tlbs(vcpu->kvm); spin_unlock(&vcpu->kvm->mmu_lock); - - if (pte_gpa == -1) - return; - if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, - sizeof(pt_element_t))) - return; - if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) { - if (mmu_topup_memory_caches(vcpu)) - return; - kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte, - sizeof(pt_element_t), 0); - } } static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) -- cgit v1.2.3 From 6e24a6eff4571002cd48b99a2b92dc829ce39cb9 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 14 Dec 2009 17:37:35 -0200 Subject: KVM: LAPIC: make sure IRR bitmap is scanned after vm load The vcpus are initialized with irr_pending set to false, but loading the LAPIC registers with pending IRR fails to reset the irr_pending variable. Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/lapic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cd60c0bd1b32..3063a0c4858b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1150,6 +1150,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) hrtimer_cancel(&apic->lapic_timer.timer); update_divide_count(apic); start_apic_timer(apic); + apic->irr_pending = true; } void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From dab4b911a5327859bb8f969249c6978c26cd4853 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 6 Dec 2009 18:24:15 +0100 Subject: KVM: x86: Extend KVM_SET_VCPU_EVENTS with selective updates User space may not want to overwrite asynchronously changing VCPU event states on write-back. So allow to skip nmi.pending and sipi_vector by setting corresponding bits in the flags field of kvm_vcpu_events. [avi: advertise the bits in KVM_GET_VCPU_EVENTS] Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- Documentation/kvm/api.txt | 10 +++++++++- arch/x86/include/asm/kvm.h | 4 ++++ arch/x86/kvm/x86.c | 12 ++++++++---- 3 files changed, 21 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index e1a114161027..2811e452f756 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -685,7 +685,7 @@ struct kvm_vcpu_events { __u8 pad; } nmi; __u32 sipi_vector; - __u32 flags; /* must be zero */ + __u32 flags; }; 4.30 KVM_SET_VCPU_EVENTS @@ -701,6 +701,14 @@ vcpu. See KVM_GET_VCPU_EVENTS for the data structure. +Fields that may be modified asynchronously by running VCPUs can be excluded +from the update. These fields are nmi.pending and sipi_vector. Keep the +corresponding bits in the flags field cleared to suppress overwriting the +current in-kernel state. The bits are: + +KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel +KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector + 5. The kvm_run structure diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 950df434763f..f46b79f6c16c 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -254,6 +254,10 @@ struct kvm_reinject_control { __u8 reserved[31]; }; +/* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ +#define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 +#define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 + /* for KVM_GET/SET_VCPU_EVENTS */ struct kvm_vcpu_events { struct { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9d068966fb2a..6651dbf58675 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1913,7 +1913,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->sipi_vector = vcpu->arch.sipi_vector; - events->flags = 0; + events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING + | KVM_VCPUEVENT_VALID_SIPI_VECTOR); vcpu_put(vcpu); } @@ -1921,7 +1922,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { - if (events->flags) + if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING + | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) return -EINVAL; vcpu_load(vcpu); @@ -1938,10 +1940,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, kvm_pic_clear_isr_ack(vcpu->kvm); vcpu->arch.nmi_injected = events->nmi.injected; - vcpu->arch.nmi_pending = events->nmi.pending; + if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) + vcpu->arch.nmi_pending = events->nmi.pending; kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); - vcpu->arch.sipi_vector = events->sipi_vector; + if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) + vcpu->arch.sipi_vector = events->sipi_vector; vcpu_put(vcpu); -- cgit v1.2.3 From d015a092989d673df44a5ad6866dc5d5006b7a2a Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 28 Dec 2009 10:26:59 +0200 Subject: x86: Use KERN_DEFAULT log-level in __show_regs() Andrew Morton reported a strange looking kmemcheck warning: WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88004fba6c20) 0000000000000000310000000000000000000000000000002413000000c9ffff u u u u u u u u u u u u u u u u i i i i i i i i u u u u u u u u [] kmemleak_scan+0x25a/0x540 [] kmemleak_scan_thread+0x5b/0xe0 [] kthread+0x9e/0xb0 [] kernel_thread_helper+0x4/0x10 [] 0xffffffffffffffff The above printout is missing register dump completely. The problem here is that the output comes from syslog which doesn't show KERN_INFO log-level messages. We didn't see this before because both of us were testing on 32-bit kernels which use the _default_ log-level. Fix that up by explicitly using KERN_DEFAULT log-level for __show_regs() printks. Signed-off-by: Pekka Enberg Cc: Vegard Nossum Cc: Andrew Morton Cc: Arjan van de Ven Cc: Linus Torvalds LKML-Reference: <1261988819.4641.2.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 4 ++-- arch/x86/kernel/process_32.c | 14 +++++++------- arch/x86/kernel/process_64.c | 24 ++++++++++++------------ 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 98c2cdeb599e..c6ee241c8a98 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -103,8 +103,8 @@ void show_regs_common(void) if (!product) product = ""; - printk("\n"); - printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n", + printk(KERN_CONT "\n"); + printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 9c517b5858f0..37ad1e046aae 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -139,16 +139,16 @@ void __show_regs(struct pt_regs *regs, int all) show_regs_common(); - printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", + printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", (u16)regs->cs, regs->ip, regs->flags, smp_processor_id()); print_symbol("EIP is at %s\n", regs->ip); - printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", + printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); - printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", + printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", regs->si, regs->di, regs->bp, sp); - printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", + printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); if (!all) @@ -158,19 +158,19 @@ void __show_regs(struct pt_regs *regs, int all) cr2 = read_cr2(); cr3 = read_cr3(); cr4 = read_cr4_safe(); - printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", + printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); get_debugreg(d2, 2); get_debugreg(d3, 3); - printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", + printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", d0, d1, d2, d3); get_debugreg(d6, 6); get_debugreg(d7, 7); - printk("DR6: %08lx DR7: %08lx\n", + printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n", d6, d7); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 52fbd0c60198..f9e033150cdf 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -161,19 +161,19 @@ void __show_regs(struct pt_regs *regs, int all) unsigned int ds, cs, es; show_regs_common(); - printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); + printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); - printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, + printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, regs->flags); - printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", + printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n", regs->ax, regs->bx, regs->cx); - printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", + printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n", regs->dx, regs->si, regs->di); - printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", + printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n", regs->bp, regs->r8, regs->r9); - printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", + printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n", regs->r10, regs->r11, regs->r12); - printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", + printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", regs->r13, regs->r14, regs->r15); asm("movl %%ds,%0" : "=r" (ds)); @@ -194,21 +194,21 @@ void __show_regs(struct pt_regs *regs, int all) cr3 = read_cr3(); cr4 = read_cr4(); - printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", + printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", fs, fsindex, gs, gsindex, shadowgs); - printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, + printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); - printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, + printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); get_debugreg(d2, 2); - printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); + printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); get_debugreg(d3, 3); get_debugreg(d6, 6); get_debugreg(d7, 7); - printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); + printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); } void show_regs(struct pt_regs *regs) -- cgit v1.2.3 From c0ca9da442df82b67095f230f24762042f9f3b7d Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 28 Dec 2009 11:02:15 +0200 Subject: x86, kmemcheck: Use KERN_WARNING for error reporting As suggested by Vegard Nossum, use KERN_WARNING for error reporting to make sure kmemcheck reports end up in syslog. Suggested-by: Vegard Nossum Signed-off-by: Pekka Enberg Cc: Andrew Morton LKML-Reference: <1261990935.4641.7.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmemcheck/error.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index 4901d0dafda6..af3b6c8a436f 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c @@ -106,26 +106,25 @@ void kmemcheck_error_recall(void) switch (e->type) { case KMEMCHECK_ERROR_INVALID_ACCESS: - printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read " - "from %s memory (%p)\n", + printk(KERN_WARNING "WARNING: kmemcheck: Caught %d-bit read from %s memory (%p)\n", 8 * e->size, e->state < ARRAY_SIZE(desc) ? desc[e->state] : "(invalid shadow state)", (void *) e->address); - printk(KERN_INFO); + printk(KERN_WARNING); for (i = 0; i < SHADOW_COPY_SIZE; ++i) - printk("%02x", e->memory_copy[i]); - printk("\n"); + printk(KERN_CONT "%02x", e->memory_copy[i]); + printk(KERN_CONT "\n"); - printk(KERN_INFO); + printk(KERN_WARNING); for (i = 0; i < SHADOW_COPY_SIZE; ++i) { if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) - printk(" %c", short_desc[e->shadow_copy[i]]); + printk(KERN_CONT " %c", short_desc[e->shadow_copy[i]]); else - printk(" ?"); + printk(KERN_CONT " ?"); } - printk("\n"); - printk(KERN_INFO "%*c\n", 2 + 2 + printk(KERN_CONT "\n"); + printk(KERN_WARNING "%*c\n", 2 + 2 * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); break; case KMEMCHECK_ERROR_BUG: -- cgit v1.2.3 From 39d30770992895d55789de64bad2349510af68d0 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 28 Dec 2009 13:28:25 -0800 Subject: x86: SGI UV: Fix writes to led registers on remote uv hubs The wrong address was being used to write the SCIR led regs on remote hubs. Also, there was an inconsistency between how BIOS and the kernel indexed these regs. Standardize on using the lower 6 bits of the APIC ID as the index. This patch fixes the problem of writing to an errant address to a cpu # >= 64. Signed-off-by: Mike Travis Reviewed-by: Jack Steiner Cc: Robin Holt Cc: Linus Torvalds Cc: stable@kernel.org LKML-Reference: <4B3922F9.3060905@sgi.com> [ v2: fix a number of annoying checkpatch artifacts and whitespace noise ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_hub.h | 86 +++++++++++++++++++++----------------- arch/x86/kernel/apic/x2apic_uv_x.c | 12 +++--- 2 files changed, 54 insertions(+), 44 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 811bfabc80b7..bc54fa965af3 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -31,20 +31,20 @@ * contiguous (although various IO spaces may punch holes in * it).. * - * N - Number of bits in the node portion of a socket physical - * address. + * N - Number of bits in the node portion of a socket physical + * address. * - * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of - * routers always have low bit of 1, C/MBricks have low bit - * equal to 0. Most addressing macros that target UV hub chips - * right shift the NASID by 1 to exclude the always-zero bit. - * NASIDs contain up to 15 bits. + * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of + * routers always have low bit of 1, C/MBricks have low bit + * equal to 0. Most addressing macros that target UV hub chips + * right shift the NASID by 1 to exclude the always-zero bit. + * NASIDs contain up to 15 bits. * * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead * of nasids. * - * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant - * of the nasid for socket usage. + * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant + * of the nasid for socket usage. * * * NumaLink Global Physical Address Format: @@ -71,12 +71,12 @@ * * * APICID format - * NOTE!!!!!! This is the current format of the APICID. However, code - * should assume that this will change in the future. Use functions - * in this file for all APICID bit manipulations and conversion. + * NOTE!!!!!! This is the current format of the APICID. However, code + * should assume that this will change in the future. Use functions + * in this file for all APICID bit manipulations and conversion. * - * 1111110000000000 - * 5432109876543210 + * 1111110000000000 + * 5432109876543210 * pppppppppplc0cch * sssssssssss * @@ -89,9 +89,9 @@ * Note: Processor only supports 12 bits in the APICID register. The ACPI * tables hold all 16 bits. Software needs to be aware of this. * - * Unless otherwise specified, all references to APICID refer to - * the FULL value contained in ACPI tables, not the subset in the - * processor APICID register. + * Unless otherwise specified, all references to APICID refer to + * the FULL value contained in ACPI tables, not the subset in the + * processor APICID register. */ @@ -151,16 +151,16 @@ struct uv_hub_info_s { }; DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); -#define uv_hub_info (&__get_cpu_var(__uv_hub_info)) +#define uv_hub_info (&__get_cpu_var(__uv_hub_info)) #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) /* * Local & Global MMR space macros. - * Note: macros are intended to be used ONLY by inline functions - * in this file - not by other kernel code. - * n - NASID (full 15-bit global nasid) - * g - GNODE (full 15-bit global nasid, right shifted 1) - * p - PNODE (local part of nsids, right shifted 1) + * Note: macros are intended to be used ONLY by inline functions + * in this file - not by other kernel code. + * n - NASID (full 15-bit global nasid) + * g - GNODE (full 15-bit global nasid, right shifted 1) + * p - PNODE (local part of nsids, right shifted 1) */ #define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) #define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra) @@ -215,8 +215,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); /* * Macros for converting between kernel virtual addresses, socket local physical * addresses, and UV global physical addresses. - * Note: use the standard __pa() & __va() macros for converting - * between socket virtual and socket physical addresses. + * Note: use the standard __pa() & __va() macros for converting + * between socket virtual and socket physical addresses. */ /* socket phys RAM --> UV global physical address */ @@ -287,21 +287,18 @@ static inline int uv_apicid_to_pnode(int apicid) * Access global MMRs using the low memory MMR32 space. This region supports * faster MMR access but not all MMRs are accessible in this space. */ -static inline unsigned long *uv_global_mmr32_address(int pnode, - unsigned long offset) +static inline unsigned long *uv_global_mmr32_address(int pnode, unsigned long offset) { return __va(UV_GLOBAL_MMR32_BASE | UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset); } -static inline void uv_write_global_mmr32(int pnode, unsigned long offset, - unsigned long val) +static inline void uv_write_global_mmr32(int pnode, unsigned long offset, unsigned long val) { writeq(val, uv_global_mmr32_address(pnode, offset)); } -static inline unsigned long uv_read_global_mmr32(int pnode, - unsigned long offset) +static inline unsigned long uv_read_global_mmr32(int pnode, unsigned long offset) { return readq(uv_global_mmr32_address(pnode, offset)); } @@ -310,21 +307,18 @@ static inline unsigned long uv_read_global_mmr32(int pnode, * Access Global MMR space using the MMR space located at the top of physical * memory. */ -static inline unsigned long *uv_global_mmr64_address(int pnode, - unsigned long offset) +static inline unsigned long *uv_global_mmr64_address(int pnode, unsigned long offset) { return __va(UV_GLOBAL_MMR64_BASE | UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); } -static inline void uv_write_global_mmr64(int pnode, unsigned long offset, - unsigned long val) +static inline void uv_write_global_mmr64(int pnode, unsigned long offset, unsigned long val) { writeq(val, uv_global_mmr64_address(pnode, offset)); } -static inline unsigned long uv_read_global_mmr64(int pnode, - unsigned long offset) +static inline unsigned long uv_read_global_mmr64(int pnode, unsigned long offset) { return readq(uv_global_mmr64_address(pnode, offset)); } @@ -338,6 +332,16 @@ static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long o return UV_GLOBAL_GRU_MMR_BASE | offset | (pnode << uv_hub_info->m_val); } +static inline void uv_write_global_mmr8(int pnode, unsigned long offset, unsigned char val) +{ + writeb(val, uv_global_mmr64_address(pnode, offset)); +} + +static inline unsigned char uv_read_global_mmr8(int pnode, unsigned long offset) +{ + return readb(uv_global_mmr64_address(pnode, offset)); +} + /* * Access hub local MMRs. Faster than using global space but only local MMRs * are accessible. @@ -457,11 +461,17 @@ static inline void uv_set_scir_bits(unsigned char value) } } +static inline unsigned long uv_scir_offset(int apicid) +{ + return SCIR_LOCAL_MMR_BASE | (apicid & 0x3f); +} + static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) { if (uv_cpu_hub_info(cpu)->scir.state != value) { + uv_write_global_mmr8(uv_cpu_to_pnode(cpu), + uv_cpu_hub_info(cpu)->scir.offset, value); uv_cpu_hub_info(cpu)->scir.state = value; - uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value); } } diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d56b0efb2057..5f92494dab61 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -629,8 +629,10 @@ void __init uv_system_init(void) uv_rtc_init(); for_each_present_cpu(cpu) { + int apicid = per_cpu(x86_cpu_to_apicid, cpu); + nid = cpu_to_node(cpu); - pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); + pnode = uv_apicid_to_pnode(apicid); blade = boot_pnode_to_blade(pnode); lcpu = uv_blade_info[blade].nr_possible_cpus; uv_blade_info[blade].nr_possible_cpus++; @@ -651,15 +653,13 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; - uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; + uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); uv_node_to_blade[nid] = blade; uv_cpu_to_blade[cpu] = blade; max_pnode = max(pnode, max_pnode); - printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, " - "lcpu %d, blade %d\n", - cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid, - lcpu, blade); + printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, lcpu %d, blade %d\n", + cpu, apicid, pnode, nid, lcpu, blade); } /* Add blade/pnode info for nodes without cpus */ -- cgit v1.2.3 From 9a7262a0563da6b91019156abf487bcdf1a41526 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 28 Dec 2009 13:28:25 -0800 Subject: x86_64 SGI UV: Fix writes to led registers on remote uv hubs. The wrong address was being used to write the SCIR led regs on remote hubs. Also, there was an inconsistency between how BIOS and the kernel indexed these regs. Standardize on using the lower 6 bits of the APIC ID as the index. This patch fixes the problem of writing to an errant address to a cpu # >= 64. Signed-off-by: Mike Travis Reviewed-by: Jack Steiner Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uv/uv_hub.h | 20 +++++++++++++++++++- arch/x86/kernel/apic/x2apic_uv_x.c | 12 ++++++------ 2 files changed, 25 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 811bfabc80b7..bcdb708993d2 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -338,6 +338,18 @@ static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long o return UV_GLOBAL_GRU_MMR_BASE | offset | (pnode << uv_hub_info->m_val); } +static inline void uv_write_global_mmr8(int pnode, unsigned long offset, + unsigned char val) +{ + writeb(val, uv_global_mmr64_address(pnode, offset)); +} + +static inline unsigned char uv_read_global_mmr8(int pnode, + unsigned long offset) +{ + return readb(uv_global_mmr64_address(pnode, offset)); +} + /* * Access hub local MMRs. Faster than using global space but only local MMRs * are accessible. @@ -457,11 +469,17 @@ static inline void uv_set_scir_bits(unsigned char value) } } +static inline unsigned long uv_scir_offset(int apicid) +{ + return SCIR_LOCAL_MMR_BASE | (apicid & 0x3f); +} + static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) { if (uv_cpu_hub_info(cpu)->scir.state != value) { + uv_write_global_mmr8(uv_cpu_to_pnode(cpu), + uv_cpu_hub_info(cpu)->scir.offset, value); uv_cpu_hub_info(cpu)->scir.state = value; - uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value); } } diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d56b0efb2057..5f92494dab61 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -629,8 +629,10 @@ void __init uv_system_init(void) uv_rtc_init(); for_each_present_cpu(cpu) { + int apicid = per_cpu(x86_cpu_to_apicid, cpu); + nid = cpu_to_node(cpu); - pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); + pnode = uv_apicid_to_pnode(apicid); blade = boot_pnode_to_blade(pnode); lcpu = uv_blade_info[blade].nr_possible_cpus; uv_blade_info[blade].nr_possible_cpus++; @@ -651,15 +653,13 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; - uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; + uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); uv_node_to_blade[nid] = blade; uv_cpu_to_blade[cpu] = blade; max_pnode = max(pnode, max_pnode); - printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, " - "lcpu %d, blade %d\n", - cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid, - lcpu, blade); + printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, lcpu %d, blade %d\n", + cpu, apicid, pnode, nid, lcpu, blade); } /* Add blade/pnode info for nodes without cpus */ -- cgit v1.2.3 From d7f0eea9e431e1b8b0742a74db1a9490730b2a25 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 30 Dec 2009 15:36:42 +0800 Subject: ACPI: introduce kernel parameter acpi_sleep=sci_force_enable Introduce kernel parameter acpi_sleep=sci_force_enable some laptop requires SCI_EN being set directly on resume, or else they hung somewhere in the resume code path. We already have a blacklist for these laptops but we still need this option, especially when debugging some suspend/resume problems, in case there are systems that need this workaround and are not yet in the blacklist. Signed-off-by: Zhang Rui Acked-by: Rafael J. Wysocki Signed-off-by: Len Brown --- Documentation/kernel-parameters.txt | 5 ++++- arch/x86/kernel/acpi/sleep.c | 2 ++ drivers/acpi/sleep.c | 29 +++++++++++++++++------------ include/linux/acpi.h | 1 + 4 files changed, 24 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 5ba4d9dff113..736d45602886 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -240,7 +240,7 @@ and is between 256 and 4096 characters. It is defined in the file acpi_sleep= [HW,ACPI] Sleep options Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, - old_ordering, s4_nonvs } + old_ordering, s4_nonvs, sci_force_enable } See Documentation/power/video.txt for information on s3_bios and s3_mode. s3_beep is for debugging; it makes the PC's speaker beep @@ -253,6 +253,9 @@ and is between 256 and 4096 characters. It is defined in the file of _PTS is used by default). s4_nonvs prevents the kernel from saving/restoring the ACPI NVS memory during hibernation. + sci_force_enable causes the kernel to set SCI_EN directly + on resume from S1/S3 (which is against the ACPI spec, + but some broken systems don't work without it). acpi_use_timer_override [HW,ACPI] Use timer override. For some broken Nvidia NF5 boards diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 82e508677b91..f9961034e557 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -162,6 +162,8 @@ static int __init acpi_sleep_setup(char *str) #endif if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); + if (strncmp(str, "sci_force_enable", 16) == 0) + acpi_set_sci_en_on_resume(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 5f2c379ab7bf..79d33d908b5a 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -80,6 +80,23 @@ static int acpi_sleep_prepare(u32 acpi_state) #ifdef CONFIG_ACPI_SLEEP static u32 acpi_target_sleep_state = ACPI_STATE_S0; +/* + * According to the ACPI specification the BIOS should make sure that ACPI is + * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states. Still, + * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI + * on such systems during resume. Unfortunately that doesn't help in + * particularly pathological cases in which SCI_EN has to be set directly on + * resume, although the specification states very clearly that this flag is + * owned by the hardware. The set_sci_en_on_resume variable will be set in such + * cases. + */ +static bool set_sci_en_on_resume; + +void __init acpi_set_sci_en_on_resume(void) +{ + set_sci_en_on_resume = true; +} + /* * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the * user to request that behavior by using the 'acpi_old_suspend_ordering' @@ -170,18 +187,6 @@ static void acpi_pm_end(void) #endif /* CONFIG_ACPI_SLEEP */ #ifdef CONFIG_SUSPEND -/* - * According to the ACPI specification the BIOS should make sure that ACPI is - * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states. Still, - * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI - * on such systems during resume. Unfortunately that doesn't help in - * particularly pathological cases in which SCI_EN has to be set directly on - * resume, although the specification states very clearly that this flag is - * owned by the hardware. The set_sci_en_on_resume variable will be set in such - * cases. - */ -static bool set_sci_en_on_resume; - extern void do_suspend_lowlevel(void); static u32 acpi_suspend_states[] = { diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ce945d4845fc..36924255c0d5 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -251,6 +251,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n, void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); void __init acpi_s4_no_nvs(void); +void __init acpi_set_sci_en_on_resume(void); #endif /* CONFIG_PM_SLEEP */ struct acpi_osc_context { -- cgit v1.2.3 From 48b5ba9cc98d676712da29d9931f1c88e5185ff2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 31 Dec 2009 05:53:02 +0100 Subject: perf: Pass appropriate frame pointer to dump_trace() Pass the frame pointer from the regs of the interrupted path to dump_trace() while processing the stack trace. Currently, dump_trace() takes the current bp and starts the callchain from dump_trace() itself. This is wasteful because we need to walk through the entire NMI/DEBUG stack before retrieving the interrupted point. We can fix that by just using the frame pointer from the captured regs. It points exactly where we want to start. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras LKML-Reference: <1262235183-5320-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras --- arch/x86/kernel/cpu/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c223b7e895d9..d616c06e99b4 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2347,7 +2347,7 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) callchain_store(entry, PERF_CONTEXT_KERNEL); callchain_store(entry, regs->ip); - dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); + dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); } /* -- cgit v1.2.3 From 9dad0fd5a73d4048dff18069733c0b515f68df74 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 22 Dec 2009 15:40:39 -0800 Subject: x86: Fix size for ex trampoline with 32bit fix for error that is introduced by | x86: Use find_e820() instead of hard coded trampoline address it should end with PAGE_SIZE + PAGE_SIZE Signed-off-by: Yinghai Lu LKML-Reference: <1261525263-13763-2-git-send-email-yinghai@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/e820.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 05ed7ab2ca48..a1a7876cadcb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -733,13 +733,13 @@ struct early_res { }; static struct early_res early_res[MAX_EARLY_RES] __initdata = { { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */ -#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_TRAMPOLINE) /* * But first pinch a few for the stack/trampoline stuff * FIXME: Don't need the extra page at 4K, but need to fix * trampoline before removing it. (see the GDT stuff) */ - { PAGE_SIZE, PAGE_SIZE, "EX TRAMPOLINE", 1 }, + { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE", 1 }, #endif {} -- cgit v1.2.3 From a557aae29cf5916295c234d4b10ba3f8f29b8a96 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 22 Dec 2009 15:40:40 -0800 Subject: x86/pci: Intel ioh bus num reg accessing fix It is above 0x100 (PCI-Express extended register space), so if mmconf is not enable, we can't access it. [ hpa: changed the bound from 0x200 to 0x120, which is the tight bound. ] Reported-by: Jens Axboe Signed-off-by: Yinghai Lu LKML-Reference: <1261525263-13763-3-git-send-email-yinghai@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/pci/intel_bus.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/pci/intel_bus.c b/arch/x86/pci/intel_bus.c index b7a55dc55d13..f81a2fa8fe25 100644 --- a/arch/x86/pci/intel_bus.c +++ b/arch/x86/pci/intel_bus.c @@ -49,6 +49,10 @@ static void __devinit pci_root_bus_res(struct pci_dev *dev) u64 mmioh_base, mmioh_end; int bus_base, bus_end; + /* some sys doesn't get mmconf enabled */ + if (dev->cfg_size < 0x120) + return; + if (pci_root_num >= PCI_ROOT_NR) { printk(KERN_DEBUG "intel_bus.c: PCI_ROOT_NR is too small\n"); return; -- cgit v1.2.3 From f4b825bde98938f160315d655597bc9731521cae Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 5 Jan 2010 12:48:49 +1030 Subject: Revert "x86: Side-step lguest problem by only building cmpxchg8b_emu for pre-Pentium" This reverts commit ae1b22f6e46c03cede7cea234d0bf2253b4261cf. As Linus said in 982d007a6ee: "There was something really messy about cmpxchg8b and clone CPU's, so if you enable it on other CPUs later, do it carefully." This breaks lguest for those configs, but we can fix that by emulating if we have to. Fixes: http://bugzilla.kernel.org/show_bug.cgi?id=14884 Signed-off-by: Rusty Russell LKML-Reference: <201001051248.49700.rusty@rustcorp.com.au> Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 08e442bc3ab9..f20ddf84a893 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -396,7 +396,7 @@ config X86_TSC config X86_CMPXCHG64 def_bool y - depends on !M386 && !M486 + depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM # this should be set for all -march=.. options where the compiler # generates cmov. -- cgit v1.2.3 From 409d02ef6d74f5e91f5ea4c587b2ee1375f106fc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 5 Jan 2010 14:19:11 +0100 Subject: x86: copy_from_user() should not return -EFAULT Callers of copy_from_user() expect it to return the number of bytes it could not copy. In no case it is supposed to return -EFAULT. In case of a detected buffer overflow just return the requested length. In addition one could think of a memset that would clear the size of the target object. [ hpa: code is not in .32 so not needed for -stable ] Signed-off-by: Heiko Carstens Acked-by: Arjan van de Ven LKML-Reference: <20100105131911.GC5480@osiris.boeblingen.de.ibm.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uaccess_32.h | 5 ++--- arch/x86/include/asm/uaccess_64.h | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 0c9825e97f36..088d09fb1615 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -205,14 +205,13 @@ static inline unsigned long __must_check copy_from_user(void *to, unsigned long n) { int sz = __compiletime_object_size(to); - int ret = -EFAULT; if (likely(sz == -1 || sz >= n)) - ret = _copy_from_user(to, from, n); + n = _copy_from_user(to, from, n); else copy_from_user_overflow(); - return ret; + return n; } long __must_check strncpy_from_user(char *dst, const char __user *src, diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 46324c6a4f6e..535e421498f6 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -30,16 +30,15 @@ static inline unsigned long __must_check copy_from_user(void *to, unsigned long n) { int sz = __compiletime_object_size(to); - int ret = -EFAULT; might_fault(); if (likely(sz == -1 || sz >= n)) - ret = _copy_from_user(to, from, n); + n = _copy_from_user(to, from, n); #ifdef CONFIG_DEBUG_VM else WARN(1, "Buffer overflow detected!\n"); #endif - return ret; + return n; } static __always_inline __must_check -- cgit v1.2.3 From db677ffa5f5a4f15b9dad4d132b3477b80766d82 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 5 Jan 2010 12:48:49 +1030 Subject: Revert "x86: Side-step lguest problem by only building cmpxchg8b_emu for pre-Pentium" This reverts commit ae1b22f6e46c03cede7cea234d0bf2253b4261cf. As Linus said in 982d007a6ee: "There was something really messy about cmpxchg8b and clone CPU's, so if you enable it on other CPUs later, do it carefully." This breaks lguest for those configs, but we can fix that by emulating if we have to. Fixes: http://bugzilla.kernel.org/show_bug.cgi?id=14884 Signed-off-by: Rusty Russell Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/Kconfig.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 08e442bc3ab9..f20ddf84a893 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -396,7 +396,7 @@ config X86_TSC config X86_CMPXCHG64 def_bool y - depends on !M386 && !M486 + depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM # this should be set for all -march=.. options where the compiler # generates cmov. -- cgit v1.2.3 From 7f41c2e1523f628cc248e34192162aec5728bed7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 6 Jan 2010 10:56:31 -0800 Subject: x86, irq: Check move_in_progress before freeing the vector mapping With the recent irq migration fixes (post 2.6.32), Gary Hade has noticed "No IRQ handler for vector" messages during the 2.6.33-rc1 kernel boot on IBM AMD platforms and root caused the issue to this commit: > commit 23359a88e7eca3c4f402562b102f23014db3c2aa > Author: Suresh Siddha > Date: Mon Oct 26 14:24:33 2009 -0800 > > x86: Remove move_cleanup_count from irq_cfg As part of this patch, we have removed the move_cleanup_count check in smp_irq_move_cleanup_interrupt(). With this change, we can run into a situation where an irq cleanup interrupt on a cpu can cleanup the vector mappings associated with multiple irqs, of which one of the irq's migration might be still in progress. As such when that irq hits the old cpu, we get the "No IRQ handler" messages. Fix this by checking for the irq_cfg's move_in_progress and if the move is still in progress delay the vector cleanup to another irq cleanup interrupt request (which will happen when the irq starts arriving at the new cpu destination). Reported-and-tested-by: Gary Hade Signed-off-by: Suresh Siddha LKML-Reference: <1262804191.2732.7.camel@sbs-t61.sc.intel.com> Cc: Eric W. Biederman Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index de00c4619a55..53243ca7816d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2434,6 +2434,13 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) cfg = irq_cfg(irq); raw_spin_lock(&desc->lock); + /* + * Check if the irq migration is in progress. If so, we + * haven't received the cleanup request yet for this irq. + */ + if (cfg->move_in_progress) + goto unlock; + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) goto unlock; -- cgit v1.2.3 From 8558e3943df1c51c3377cb4e8a52ea484d6f357d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 6 Jan 2010 16:11:06 -0500 Subject: x86, ACPI: delete acpi_boot_table_init() return value cleanup only. setup_arch(), doesn't care care if ACPI initialization succeeded or failed, so delete acpi_boot_table_init()'s return value. Signed-off-by: Len Brown --- arch/x86/kernel/acpi/boot.c | 22 ++++++---------------- include/linux/acpi.h | 6 +++--- 2 files changed, 9 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index fb1035cd9a6a..036d28adf59d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1529,16 +1529,10 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = { * if acpi_blacklisted() acpi_disabled = 1; * acpi_irq_model=... * ... - * - * return value: (currently ignored) - * 0: success - * !0: failure */ -int __init acpi_boot_table_init(void) +void __init acpi_boot_table_init(void) { - int error; - dmi_check_system(acpi_dmi_table); /* @@ -1546,15 +1540,14 @@ int __init acpi_boot_table_init(void) * One exception: acpi=ht continues far enough to enumerate LAPICs */ if (acpi_disabled && !acpi_ht) - return 1; + return; /* * Initialize the ACPI boot-time table parser. */ - error = acpi_table_init(); - if (error) { + if (acpi_table_init()) { disable_acpi(); - return error; + return; } acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); @@ -1562,18 +1555,15 @@ int __init acpi_boot_table_init(void) /* * blacklist may disable ACPI entirely */ - error = acpi_blacklisted(); - if (error) { + if (acpi_blacklisted()) { if (acpi_force) { printk(KERN_WARNING PREFIX "acpi=force override\n"); } else { printk(KERN_WARNING PREFIX "Disabling ACPI support\n"); disable_acpi(); - return error; + return; } } - - return 0; } int __init early_acpi_boot_init(void) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 36924255c0d5..b926afe8c03e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -80,7 +80,7 @@ char * __acpi_map_table (unsigned long phys_addr, unsigned long size); void __acpi_unmap_table(char *map, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); -int acpi_boot_table_init (void); +void acpi_boot_table_init (void); int acpi_mps_check (void); int acpi_numa_init (void); @@ -321,9 +321,9 @@ static inline int acpi_boot_init(void) return 0; } -static inline int acpi_boot_table_init(void) +static inline void acpi_boot_table_init(void) { - return 0; + return; } static inline int acpi_mps_check(void) -- cgit v1.2.3 From 4b529401c5089cf33f7165607cbc2fde43357bfb Mon Sep 17 00:00:00 2001 From: Andreas Fenkart Date: Fri, 8 Jan 2010 14:42:31 -0800 Subject: mm: make totalhigh_pages unsigned long Makes it consistent with the extern declaration, used when CONFIG_HIGHMEM is set Removes redundant casts in printout messages Signed-off-by: Andreas Fenkart Acked-by: Russell King Cc: Ralf Baechle Cc: David Howells Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Chen Liqin Cc: Lennox Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/init.c | 2 +- arch/mips/mm/init.c | 2 +- arch/mips/sgi-ip27/ip27-memory.c | 2 +- arch/mn10300/mm/init.c | 3 +-- arch/score/mm/init.c | 2 +- arch/x86/mm/init_32.c | 3 +-- include/linux/highmem.h | 2 +- 7 files changed, 7 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 52c40d155672..a04ffbbbe253 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -616,7 +616,7 @@ void __init mem_init(void) "%dK data, %dK init, %luK highmem)\n", nr_free_pages() << (PAGE_SHIFT-10), codesize >> 10, datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); + totalhigh_pages << (PAGE_SHIFT-10)); if (PAGE_SIZE >= 16384 && num_physpages <= 128) { extern int sysctl_overcommit_memory; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 9e8d00389eef..1651942f7feb 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -424,7 +424,7 @@ void __init mem_init(void) reservedpages << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); + totalhigh_pages << (PAGE_SHIFT-10)); } #endif /* !CONFIG_NEED_MULTIPLE_NODES */ diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index f61c164d1e67..bc1297109cc5 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -505,5 +505,5 @@ void __init mem_init(void) (num_physpages - tmp) << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); + totalhigh_pages << (PAGE_SHIFT-10)); } diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c index ec1420562dc7..dd27a9a35152 100644 --- a/arch/mn10300/mm/init.c +++ b/arch/mn10300/mm/init.c @@ -118,8 +118,7 @@ void __init mem_init(void) reservedpages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT - 10)) - ); + totalhigh_pages << (PAGE_SHIFT - 10)); } /* diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c index 8c15b2c85d5a..dfaf458d6702 100644 --- a/arch/score/mm/init.c +++ b/arch/score/mm/init.c @@ -106,7 +106,7 @@ void __init mem_init(void) ram << (PAGE_SHIFT-10), codesize >> 10, reservedpages << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); + totalhigh_pages << (PAGE_SHIFT-10)); } #endif /* !CONFIG_NEED_MULTIPLE_NODES */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c973f8e2a6cf..9a0c258a86be 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -892,8 +892,7 @@ void __init mem_init(void) reservedpages << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) - ); + totalhigh_pages << (PAGE_SHIFT-10)); printk(KERN_INFO "virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 211ff4497269..ab2cc20e21a5 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -46,7 +46,7 @@ void kmap_flush_unused(void); static inline unsigned int nr_free_highpages(void) { return 0; } -#define totalhigh_pages 0 +#define totalhigh_pages 0UL #ifndef ARCH_HAS_KMAP static inline void *kmap(struct page *page) -- cgit v1.2.3 From 13510997d600a076e064f10587a8f6d20f8fff41 Mon Sep 17 00:00:00 2001 From: Albin Tonnerre Date: Fri, 8 Jan 2010 14:42:45 -0800 Subject: x86: add support for LZO-compressed kernels The necessary changes to the x86 Kconfig and boot/compressed to allow the use of this new compression method Signed-off-by: Albin Tonnerre Acked-by: H. Peter Anvin Tested-by: Wu Zhangjin Cc: Ingo Molnar Cc: Thomas Gleixner Tested-by: Russell King Acked-by: Russell King Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + arch/x86/boot/compressed/Makefile | 5 ++++- arch/x86/boot/compressed/misc.c | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 55298e891571..6bf1f1ac478c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -49,6 +49,7 @@ config X86 select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA + select HAVE_KERNEL_LZO select HAVE_HW_BREAKPOINT select PERF_EVENTS select ANON_INODES diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index f25bbd37765a..fbb47daf2459 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -4,7 +4,7 @@ # create a compressed vmlinux image from the original vmlinux # -targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma head_$(BITS).o misc.o piggy.o +targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.lzo head_$(BITS).o misc.o piggy.o KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -fno-strict-aliasing -fPIC @@ -49,10 +49,13 @@ $(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE $(call if_changed,bzip2) $(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE $(call if_changed,lzma) +$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE + $(call if_changed,lzo) suffix-$(CONFIG_KERNEL_GZIP) := gz suffix-$(CONFIG_KERNEL_BZIP2) := bz2 suffix-$(CONFIG_KERNEL_LZMA) := lzma +suffix-$(CONFIG_KERNEL_LZO) := lzo quiet_cmd_mkpiggy = MKPIGGY $@ cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 842b2a36174a..3b22fe8ab91b 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -162,6 +162,10 @@ static int lines, cols; #include "../../../../lib/decompress_unlzma.c" #endif +#ifdef CONFIG_KERNEL_LZO +#include "../../../../lib/decompress_unlzo.c" +#endif + static void scroll(void) { int i; -- cgit v1.2.3 From a29815a333c6c6e677294bbe5958e771d0aad3fd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 10 Jan 2010 16:28:09 +0200 Subject: core, x86: make LIST_POISON less deadly The list macros use LIST_POISON1 and LIST_POISON2 as undereferencable pointers in order to trap erronous use of freed list_heads. Unfortunately userspace can arrange for those pointers to actually be dereferencable, potentially turning an oops to an expolit. To avoid this allow architectures (currently x86_64 only) to override the default values for these pointers with truly-undereferencable values. This is easy on x86_64 as the virtual address space is large and contains areas that cannot be mapped. Other 64-bit architectures will likely find similar unmapped ranges. [ingo: switch to 0xdead000000000000 as the unmapped area] [ingo: add comments, cleanup] [jaswinder: eliminate sparse warnings] Acked-by: Linus Torvalds Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar Signed-off-by: Avi Kivity Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 5 +++++ include/linux/poison.h | 16 ++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6bf1f1ac478c..cbcbfdee3ee0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1247,6 +1247,11 @@ config ARCH_MEMORY_PROBE def_bool X86_64 depends on MEMORY_HOTPLUG +config ILLEGAL_POINTER_VALUE + hex + default 0 if X86_32 + default 0xdead000000000000 if X86_64 + source "mm/Kconfig" config HIGHPTE diff --git a/include/linux/poison.h b/include/linux/poison.h index 7fc194aef8c2..2110a81c5e2a 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -2,13 +2,25 @@ #define _LINUX_POISON_H /********** include/linux/list.h **********/ + +/* + * Architectures might want to move the poison pointer offset + * into some well-recognized area such as 0xdead000000000000, + * that is also not mappable by user-space exploits: + */ +#ifdef CONFIG_ILLEGAL_POINTER_VALUE +# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL) +#else +# define POISON_POINTER_DELTA 0 +#endif + /* * These are non-NULL pointers that will result in page faults * under normal circumstances, used to verify that nobody uses * non-initialized list entries. */ -#define LIST_POISON1 ((void *) 0x00100100) -#define LIST_POISON2 ((void *) 0x00200200) +#define LIST_POISON1 ((void *) 0x00100100 + POISON_POINTER_DELTA) +#define LIST_POISON2 ((void *) 0x00200200 + POISON_POINTER_DELTA) /********** include/linux/timer.h **********/ /* -- cgit v1.2.3 From 066000dd856709b6980123eb39b957fe26993f7b Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Mon, 11 Jan 2010 15:51:04 -0800 Subject: Revert "x86, apic: Use logical flat on intel with <= 8 logical cpus" Revert commit 2fbd07a5f5d1295fa9b0c0564ec27da7c276a75a, as this commit breaks an IBM platform with quad-core Xeon cpu's. According to Suresh, this might be an IBM platform issue, as on other Intel platforms with <= 8 logical cpu's, logical flat mode works fine irespective of physical apic id values (inline with the xapic architecture). Revert this for now because of the IBM platform breakage. Another version will be re-submitted after the complete analysis. Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Suresh Siddha Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic/apic.c | 26 ++++++++++++++++++-------- arch/x86/kernel/apic/probe_64.c | 15 ++++----------- 2 files changed, 22 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index aa57c079c98f..e80f291472a4 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -62,7 +62,7 @@ unsigned int boot_cpu_physical_apicid = -1U; /* * The highest APIC ID seen during enumeration. * - * On AMD, this determines the messaging protocol we can use: if all APIC IDs + * This determines the messaging protocol we can use: if all APIC IDs * are in the 0 ... 7 range, then we can use logical addressing which * has some performance advantages (better broadcasting). * @@ -1898,14 +1898,24 @@ void __cpuinit generic_processor_info(int apicid, int version) max_physical_apicid = apicid; #ifdef CONFIG_X86_32 - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (num_processors > 8) - def_to_bigsmp = 1; - break; - case X86_VENDOR_AMD: - if (max_physical_apicid >= 8) + /* + * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y + * but we need to work other dependencies like SMP_SUSPEND etc + * before this can be done without some confusion. + * if (CPU_HOTPLUG_ENABLED || num_processors > 8) + * - Ashok Raj + */ + if (max_physical_apicid >= 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: def_to_bigsmp = 1; + } } #endif diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index c4cbd3080c1c..65edc180fc82 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -64,23 +64,16 @@ void __init default_setup_apic_routing(void) apic = &apic_x2apic_phys; else apic = &apic_x2apic_cluster; + printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } #endif if (apic == &apic_flat) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (num_processors > 8) - apic = &apic_physflat; - break; - case X86_VENDOR_AMD: - if (max_physical_apicid >= 8) - apic = &apic_physflat; - } + if (max_physical_apicid >= 8) + apic = &apic_physflat; + printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } - printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); - if (is_vsmp_box()) { /* need to update phys_pkg_id */ apic->phys_pkg_id = apicid_phys_pkg_id; -- cgit v1.2.3 From c2c5d45d46c8c0fd34291dec958670ad4816796f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 31 Dec 2009 03:52:25 +0100 Subject: perf: Stop stack frame walking off kernel addresses boundaries While processing kernel perf callchains, an bad entry can be considered as a valid stack pointer but not as a kernel address. In this case, we hang in an endless loop. This can happen in an x86-32 kernel after processing the last entry in a kernel stacktrace. Just stop the stack frame walking after we encounter an invalid kernel address. This fixes a hard lockup in x86-32. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras LKML-Reference: <1262227945-27014-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index c56bc2873030..6d817554780a 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -123,13 +123,15 @@ print_context_stack_bp(struct thread_info *tinfo, while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) { unsigned long addr = *ret_addr; - if (__kernel_text_address(addr)) { - ops->address(data, addr, 1); - frame = frame->next_frame; - ret_addr = &frame->return_address; - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); - } + if (!__kernel_text_address(addr)) + break; + + ops->address(data, addr, 1); + frame = frame->next_frame; + ret_addr = &frame->return_address; + print_ftrace_graph_addr(addr, data, ops, tinfo, graph); } + return (unsigned long)frame; } EXPORT_SYMBOL_GPL(print_context_stack_bp); -- cgit v1.2.3 From df39a2e48f99e2d706e8fa4dc99fd148eb59449d Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 4 Jan 2010 16:17:21 +0000 Subject: x86: mce.h: Fix warning in header checks Someone isn't reading their build output: Move the definition out of the exported header. Signed-off-by: Alan Cox Cc: linux-kernel@vger.kernelorg Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 858baa061cfc..6c3fdd631ed3 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -108,10 +108,11 @@ struct mce_log { #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) -extern struct atomic_notifier_head x86_mce_decoder_chain; #ifdef __KERNEL__ +extern struct atomic_notifier_head x86_mce_decoder_chain; + #include #include #include -- cgit v1.2.3 From fcfbb2b5facd65efa7284cc315225bfe3d1856c2 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 8 Jan 2010 12:13:54 -0800 Subject: x86: SGI UV: Fix mapping of MMIO registers This fixes the problem of the initialization code not correctly mapping the entire MMIO space on a UV system. A side effect is the map_high() interface needed to be changed to accommodate different address and size shifts. Signed-off-by: Mike Travis Reviewed-by: Mike Habeck Cc: Cc: Jack Steiner Cc: Linus Torvalds LKML-Reference: <4B479202.7080705@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 5f92494dab61..b8bb869a6618 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -374,13 +374,13 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) enum map_type {map_wb, map_uc}; -static __init void map_high(char *id, unsigned long base, int shift, - int max_pnode, enum map_type map_type) +static __init void map_high(char *id, unsigned long base, int pshift, + int bshift, int max_pnode, enum map_type map_type) { unsigned long bytes, paddr; - paddr = base << shift; - bytes = (1UL << shift) * (max_pnode + 1); + paddr = base << pshift; + bytes = (1UL << bshift) * (max_pnode + 1); printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes); if (map_type == map_uc) @@ -396,7 +396,7 @@ static __init void map_gru_high(int max_pnode) gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); if (gru.s.enable) { - map_high("GRU", gru.s.base, shift, max_pnode, map_wb); + map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); gru_start_paddr = ((u64)gru.s.base << shift); gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); @@ -410,7 +410,7 @@ static __init void map_mmr_high(int max_pnode) mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); if (mmr.s.enable) - map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); + map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc); } static __init void map_mmioh_high(int max_pnode) @@ -420,7 +420,8 @@ static __init void map_mmioh_high(int max_pnode) mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); if (mmioh.s.enable) - map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); + map_high("MMIOH", mmioh.s.base, shift, mmioh.s.m_io, + max_pnode, map_uc); } static __init void map_low_mmrs(void) -- cgit v1.2.3 From 42590a75019a50012f25a962246498dead428433 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 4 Jan 2010 16:16:23 +0900 Subject: x86/agp: Fix agp_amd64_init and agp_amd64_cleanup This fixes the regression introduced by the commit f405d2c02395a74d3883bd03ded36457aa3697ad. The above commit fixes the following issue: http://marc.info/?l=linux-kernel&m=126192729110083&w=2 However, it doesn't work properly when you remove and insert the agp_amd64 module again. agp_amd64_init() and agp_amd64_cleanup should be called only when gart_iommu is not called earlier (that is, the GART IOMMU is not enabled). We need to use 'gart_iommu_aperture' to see if GART IOMMU is enabled or not. Signed-off-by: FUJITA Tomonori Cc: mitov@issp.bas.bg Cc: davej@redhat.com LKML-Reference: <20100104161603L.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/aperture_64.c | 1 + drivers/char/agp/amd64-agp.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 3704997e8b25..f147a95fd84a 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -31,6 +31,7 @@ #include int gart_iommu_aperture; +EXPORT_SYMBOL_GPL(gart_iommu_aperture); int gart_iommu_aperture_disabled __initdata; int gart_iommu_aperture_allowed __initdata; diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 5aa7a586a7ff..1afb8968a342 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -725,12 +725,11 @@ static struct pci_driver agp_amd64_pci_driver = { int __init agp_amd64_init(void) { int err = 0; - static int done = 0; if (agp_off) return -EINVAL; - if (done++) + if (gart_iommu_aperture) return agp_bridges_found ? 0 : -ENODEV; err = pci_register_driver(&agp_amd64_pci_driver); @@ -771,6 +770,8 @@ int __init agp_amd64_init(void) static void __exit agp_amd64_cleanup(void) { + if (gart_iommu_aperture) + return; if (aperture_resource) release_resource(aperture_resource); pci_unregister_driver(&agp_amd64_pci_driver); -- cgit v1.2.3 From 864a0922dd128392467611d9857e5138c6a91999 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 13 Jan 2010 10:16:07 +0000 Subject: x86: kernel_thread() -- initialize SS to a known state Before the kernel_thread was converted into "C" we had pt_regs::ss set to __KERNEL_DS (by SAVE_ALL asm macro). Though I must admit I didn't find any *explicit* load of %ss from this structure the better to be on a safe side and set it to a known value. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ian Campbell Cc: Christian Kujau Cc: Jeremy Fitzhardinge Cc: Brian Gerst LKML-Reference: <1263377768-19600-1-git-send-email-ian.campbell@citrix.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c6ee241c8a98..02c3ee013ccd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -288,6 +288,8 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.es = __USER_DS; regs.fs = __KERNEL_PERCPU; regs.gs = __KERNEL_STACK_CANARY; +#else + regs.ss = __KERNEL_DS; #endif regs.orig_ax = -1; -- cgit v1.2.3 From e68266b7001a4e29af083716f0c36c0d6dbb1b39 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 13 Jan 2010 10:16:08 +0000 Subject: x86: xen: 64-bit kernel RPL should be 0 Under Xen 64 bit guests actually run their kernel in ring 3, however the hypervisor takes care of squashing descriptor the RPLs transparently (in order to allow them to continue to differentiate between user and kernel space CS using the RPL). Therefore the Xen paravirt backend should use RPL==0 instead of 1 (or 3). Using RPL==1 causes generic arch code to take incorrect code paths because it uses "testl $3, , je foo" type tests for a userspace CS and this considers 1==userspace. This issue was previously masked because get_kernel_rpl() was omitted when setting CS in kernel_thread(). This was fixed when kernel_thread() was unified with 32 bit in f443ff4201dd25cd4dec183f9919ecba90c8edc2. Signed-off-by: Ian Campbell Cc: Christian Kujau Cc: Jeremy Fitzhardinge Cc: Cyrill Gorcunov Cc: Brian Gerst LKML-Reference: <1263377768-19600-2-git-send-email-ian.campbell@citrix.com> Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2b26dd5930c6..36daccb68642 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1151,9 +1151,13 @@ asmlinkage void __init xen_start_kernel(void) /* keep using Xen gdt for now; no urgent need to change it */ +#ifdef CONFIG_X86_32 pv_info.kernel_rpl = 1; if (xen_feature(XENFEAT_supervisor_mode_kernel)) pv_info.kernel_rpl = 0; +#else + pv_info.kernel_rpl = 0; +#endif /* set the limit of our address space */ xen_reserve_top(); -- cgit v1.2.3 From 557a701c16553b0b691dbb64ef30361115a80f64 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 14 Dec 2009 11:44:15 +0100 Subject: [CPUFREQ] Fix use after free of struct powernow_k8_data Easy fix for a regression introduced in 2.6.31. On managed CPUs the cpufreq.c core will call driver->exit(cpu) on the managed cpus and powernow_k8 will free the core's data. Later driver->get(cpu) function might get called trying to read out the current freq of a managed cpu and the NULL pointer check does not work on the freed object -> better set it to NULL. ->get() is unsigned and must return 0 as invalid frequency. Reference: http://bugzilla.kernel.org/show_bug.cgi?id=14391 Signed-off-by: Thomas Renninger Tested-by: Michal Schmidt CC: stable@kernel.org Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index a9df9441a9a2..2da4fa3bf6e9 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1356,6 +1356,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) kfree(data->powernow_table); kfree(data); + per_cpu(powernow_data, pol->cpu) = NULL; return 0; } @@ -1375,7 +1376,7 @@ static unsigned int powernowk8_get(unsigned int cpu) int err; if (!data) - return -EINVAL; + return 0; smp_call_function_single(cpu, query_values_on_cpu, &err, true); if (err) -- cgit v1.2.3 From 7a1110e861b2666ac09f5708d6fbe71d18ce64bb Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Tue, 12 Jan 2010 15:09:04 -0600 Subject: x86, uv: Add function retrieving node controller revision number Add function for determining the revision id of the SGI UV node controller chip (HUB). This function is needed in a subsequent patch. Signed-off-by: Jack Steiner LKML-Reference: <20100112210904.GA24546@sgi.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uv/uv_hub.h | 12 ++++++++++++ arch/x86/kernel/apic/x2apic_uv_x.c | 6 ++++++ 2 files changed, 18 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index bc54fa965af3..40be813fefb1 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -495,5 +495,17 @@ static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } +/* + * Get the minimum revision number of the hub chips within the partition. + * 1 - initial rev 1.0 silicon + * 2 - rev 2.0 production silicon + */ +static inline int uv_get_min_hub_revision_id(void) +{ + extern int uv_min_hub_revision_id; + + return uv_min_hub_revision_id; +} + #endif /* CONFIG_X86_64 */ #endif /* _ASM_X86_UV_UV_HUB_H */ diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index b8bb869a6618..0e48de9ff864 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -36,6 +36,8 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); static enum uv_system_type uv_system_type; static u64 gru_start_paddr, gru_end_paddr; +int uv_min_hub_revision_id; +EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); static inline bool is_GRU_range(u64 start, u64 end) { @@ -55,6 +57,10 @@ static int early_get_nodeid(void) mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr)); node_id.v = *mmr; early_iounmap(mmr, sizeof(*mmr)); + + /* Currently, all blades have same revision number */ + uv_min_hub_revision_id = node_id.s.revision; + return node_id.s.node_id; } -- cgit v1.2.3 From 1d2c867c941d635e53e8ad7bf37d060bb5b25ec5 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Fri, 15 Jan 2010 12:09:09 -0600 Subject: x86, uv: Ensure hub revision set for all ACPI modes. Ensure that UV hub revision is set for all ACPI modes. Signed-off-by: Russ Anderson LKML-Reference: <20100115180908.GB7757@sgi.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/x2apic_uv_x.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 0e48de9ff864..21db3cbea7dc 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -66,7 +66,10 @@ static int early_get_nodeid(void) static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { + int nodeid; + if (!strcmp(oem_id, "SGI")) { + nodeid = early_get_nodeid(); x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; if (!strcmp(oem_table_id, "UVL")) uv_system_type = UV_LEGACY_APIC; @@ -74,7 +77,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) uv_system_type = UV_X2APIC; else if (!strcmp(oem_table_id, "UVH")) { __get_cpu_var(x2apic_extra_bits) = - early_get_nodeid() << (UV_APIC_PNODE_SHIFT - 1); + nodeid << (UV_APIC_PNODE_SHIFT - 1); uv_system_type = UV_NON_UNIQUE_APIC; return 1; } -- cgit v1.2.3 From 0bb7a95f5455cd87e6a69e5818bc1f509a98d187 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 16 Jan 2010 10:39:30 +0100 Subject: hw-breakpoints, perf: Fix broken mmiotrace due to dr6 by reference change Commit 62edab9056a6cf0c9207339c8892c923a5217e45 (from June 2009 but merged in 2.6.33) changes notify_die to pass dr6 by reference. However, it forgets to fix the check for DR_STEP in kmmio.c, breaking mmiotrace. It also passes a wrong value to the post handler. This simple fix makes mmiotrace work again. Signed-off-by: Luca Barbieri Acked-by: K.Prasad Cc: Frederic Weisbecker LKML-Reference: <1263634770-14578-1-git-send-email-luca@luca-barbieri.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index c0f6198565eb..536fb6823366 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -538,14 +538,15 @@ static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) { struct die_args *arg = args; + unsigned long* dr6_p = (unsigned long *)ERR_PTR(arg->err); - if (val == DIE_DEBUG && (arg->err & DR_STEP)) - if (post_kmmio_handler(arg->err, arg->regs) == 1) { + if (val == DIE_DEBUG && (*dr6_p & DR_STEP)) + if (post_kmmio_handler(*dr6_p, arg->regs) == 1) { /* * Reset the BS bit in dr6 (pointed by args->err) to * denote completion of processing */ - (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; + *dr6_p &= ~DR_STEP; return NOTIFY_STOP; } -- cgit v1.2.3 From dfea91d5a7c795fd6f4e1a97489a98e4e767463e Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 18 Jan 2010 12:10:48 -0800 Subject: x86, apic: use physical mode for IBM summit platforms Chris McDermott from IBM confirmed that hurricane chipset in IBM summit platforms doesn't support logical flat mode. Irrespective of the other things like apic_id's, total number of logical cpu's, Linux kernel should default to physical mode for this system. The 32-bit kernel does so using the OEM checks for the IBM summit platform. Add a similar OEM platform check for the 64bit kernel too. Otherwise the linux kernel boot can hang on this platform under certain bios/platform settings. Signed-off-by: Suresh Siddha Tested-by: Ananth N Mavinakayanahalli Cc: Chris McDermott Cc: Yinghai Lu Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic/apic_flat_64.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index eacbd2b31d27..e3c3d820c325 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -240,6 +240,11 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) printk(KERN_DEBUG "system APIC only can use physical flat"); return 1; } + + if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "EXA", 3)) { + printk(KERN_DEBUG "IBM Summit detected, will use apic physical"); + return 1; + } #endif return 0; -- cgit v1.2.3 From bb668da6d6f2bec8a63838c098d9515eccb22cc4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 18 Jan 2010 12:10:49 -0800 Subject: x86, apic: use logical flat for systems with <= 8 logical cpus We can use logical flat mode if there are <= 8 logical cpu's (irrespective of physical apic id values). This will enable simplified and efficient IPI and device interrupt routing on such platforms. This has been tested to work on both Intel and AMD platforms. Exceptions like IBM summit platform which can't use logical flat mode are addressed by using OEM platform checks. Signed-off-by: Suresh Siddha Signed-off-by: Yinghai Lu Cc: Ananth N Mavinakayanahalli Cc: Chris McDermott Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic/apic.c | 15 +-------------- arch/x86/kernel/apic/probe_64.c | 8 +++----- 2 files changed, 4 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index e80f291472a4..3987e4408f75 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -61,12 +61,6 @@ unsigned int boot_cpu_physical_apicid = -1U; /* * The highest APIC ID seen during enumeration. - * - * This determines the messaging protocol we can use: if all APIC IDs - * are in the 0 ... 7 range, then we can use logical addressing which - * has some performance advantages (better broadcasting). - * - * If there's an APIC ID above 8, we use physical addressing. */ unsigned int max_physical_apicid; @@ -1898,14 +1892,7 @@ void __cpuinit generic_processor_info(int apicid, int version) max_physical_apicid = apicid; #ifdef CONFIG_X86_32 - /* - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y - * but we need to work other dependencies like SMP_SUSPEND etc - * before this can be done without some confusion. - * if (CPU_HOTPLUG_ENABLED || num_processors > 8) - * - Ashok Raj - */ - if (max_physical_apicid >= 8) { + if (num_processors > 8) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: if (!APIC_XAPIC(version)) { diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 65edc180fc82..450fe2064a14 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -64,15 +64,13 @@ void __init default_setup_apic_routing(void) apic = &apic_x2apic_phys; else apic = &apic_x2apic_cluster; - printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } #endif - if (apic == &apic_flat) { - if (max_physical_apicid >= 8) + if (apic == &apic_flat && num_processors > 8) apic = &apic_physflat; - printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); - } + + printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); if (is_vsmp_box()) { /* need to update phys_pkg_id */ -- cgit v1.2.3 From b27d515a49169e5e2a92d621faac761074a8c5b1 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 18 Jan 2010 10:58:01 +0200 Subject: perf: x86: Add support for the ANY bit Propagate the ANY bit into the fixed counter config for v3 and higher. Signed-off-by: Stephane Eranian [a.p.zijlstra@chello.nl: split from larger patch] Signed-off-by: Peter Zijlstra LKML-Reference: <4b5430c6.0f975e0a.1bf9.ffff85fe@mx.google.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 1 + arch/x86/kernel/cpu/perf_event.c | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8d9f8548a870..1380367dabd9 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -19,6 +19,7 @@ #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 #define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) +#define ARCH_PERFMON_EVENTSEL_ANY (1 << 21) #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index d616c06e99b4..8c1c07073ccc 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1343,6 +1343,13 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) bits |= 0x2; if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) bits |= 0x1; + + /* + * ANY bit is supported in v3 and up + */ + if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) + bits |= 0x4; + bits <<= (idx * 4); mask = 0xfULL << (idx * 4); -- cgit v1.2.3 From d91afd15b041f27d34859c79afa9e172018a86f4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 Jan 2010 16:40:20 +0100 Subject: x86/amd-iommu: Fix possible integer overflow The variable i in this function could be increased to over 2**32 which would result in an integer overflow when using int. Fix it by changing i to unsigned long. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 23824fef789c..c2ccbd7b862f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -980,7 +980,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, { int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; struct amd_iommu *iommu; - int i; + unsigned long i; #ifdef CONFIG_IOMMU_STRESS populate = false; -- cgit v1.2.3 From 2ca762790caf822f7b61430fbaffa3ae4219977f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 Jan 2010 16:45:31 +0100 Subject: x86/amd-iommu: Fix NULL pointer dereference in __detach_device() In the __detach_device function the reference count for a device-domain binding may become zero. This results in the device being removed from the domain and dev_data->domain will be NULL. This is bad because this pointer is dereferenced when trying to unlock the domain->lock. This patch fixes the issue by keeping the domain in a seperate variable. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c2ccbd7b862f..4478a48198a8 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1489,11 +1489,14 @@ static void __detach_device(struct device *dev) { struct iommu_dev_data *dev_data = get_dev_data(dev); struct iommu_dev_data *alias_data; + struct protection_domain *domain; unsigned long flags; BUG_ON(!dev_data->domain); - spin_lock_irqsave(&dev_data->domain->lock, flags); + domain = dev_data->domain; + + spin_lock_irqsave(&domain->lock, flags); if (dev_data->alias != dev) { alias_data = get_dev_data(dev_data->alias); @@ -1504,7 +1507,7 @@ static void __detach_device(struct device *dev) if (atomic_dec_and_test(&dev_data->bind)) do_detach(dev); - spin_unlock_irqrestore(&dev_data->domain->lock, flags); + spin_unlock_irqrestore(&domain->lock, flags); /* * If we run in passthrough mode the device must be assigned to the -- cgit v1.2.3 From f5325094379158e6b876ea0010c807bf7890ec8f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 Jan 2010 17:44:35 +0100 Subject: x86/amd-iommu: Fix IOMMU-API initialization for iommu=pt This patch moves the initialization of the iommu-api out of the dma-ops initialization code. This ensures that the iommu-api is initialized even with iommu=pt. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_proto.h | 1 + arch/x86/kernel/amd_iommu.c | 8 ++++++-- arch/x86/kernel/amd_iommu_init.c | 3 +++ 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index 4d817f9e6e77..d2544f1d705d 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -31,6 +31,7 @@ extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); extern int amd_iommu_init_devices(void); extern void amd_iommu_uninit_devices(void); extern void amd_iommu_init_notifier(void); +extern void amd_iommu_init_api(void); #ifndef CONFIG_AMD_IOMMU_STATS static inline void amd_iommu_stats_init(void) { } diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 4478a48198a8..751ce73c6e1b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -2221,6 +2221,12 @@ static struct dma_map_ops amd_iommu_dma_ops = { /* * The function which clues the AMD IOMMU driver into dma_ops. */ + +void __init amd_iommu_init_api(void) +{ + register_iommu(&amd_iommu_ops); +} + int __init amd_iommu_init_dma_ops(void) { struct amd_iommu *iommu; @@ -2256,8 +2262,6 @@ int __init amd_iommu_init_dma_ops(void) /* Make the driver finally visible to the drivers */ dma_ops = &amd_iommu_dma_ops; - register_iommu(&amd_iommu_ops); - amd_iommu_stats_init(); return 0; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index fb490ce7dd55..9dc91b431470 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1292,9 +1292,12 @@ static int __init amd_iommu_init(void) ret = amd_iommu_init_passthrough(); else ret = amd_iommu_init_dma_ops(); + if (ret) goto free; + amd_iommu_init_api(); + amd_iommu_init_notifier(); enable_iommus(); -- cgit v1.2.3 From d3ad9373b7c29b63d5e8460a69453718d200cc3b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 Jan 2010 17:55:27 +0100 Subject: x86/amd-iommu: Fix deassignment of a device from the pt_domain Deassigning a device from the passthrough domain does not work and breaks device assignment to kvm guests. This patch fixes the issue. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 751ce73c6e1b..adb0ba025702 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1511,9 +1511,11 @@ static void __detach_device(struct device *dev) /* * If we run in passthrough mode the device must be assigned to the - * passthrough domain if it is detached from any other domain + * passthrough domain if it is detached from any other domain. + * Make sure we can deassign from the pt_domain itself. */ - if (iommu_pass_through && dev_data->domain == NULL) + if (iommu_pass_through && + (dev_data->domain == NULL && domain != pt_domain)) __attach_device(dev, pt_domain); } -- cgit v1.2.3 From 3a5fc0e40cb467e692737bc798bc99773c81e1e2 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 20 Jan 2010 12:10:47 -0800 Subject: x86: Set hotpluggable nodes in nodes_possible_map nodes_possible_map does not currently include nodes that have SRAT entries that are all ACPI_SRAT_MEM_HOT_PLUGGABLE since the bit is cleared in nodes_parsed if it does not have an online address range. Unequivocally setting the bit in nodes_parsed is insufficient since existing code, such as acpi_get_nodes(), assumes all nodes in the map have online address ranges. In fact, all code using nodes_parsed assumes such nodes represent an address range of online memory. nodes_possible_map is created by unioning nodes_parsed and cpu_nodes_parsed; the former represents nodes with online memory and the latter represents memoryless nodes. We now set the bit for hotpluggable nodes in cpu_nodes_parsed so that it also gets set in nodes_possible_map. [ hpa: Haicheng Li points out that this makes the naming of the variable cpu_nodes_parsed somewhat counterintuitive. However, leave it as is in the interest of keeping the pure bug fix patch small. ] Signed-off-by: David Rientjes Tested-by: Haicheng Li LKML-Reference: Cc: Signed-off-by: H. Peter Anvin --- arch/x86/mm/srat_64.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index a27124185fc1..28c68762648f 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -229,9 +229,11 @@ update_nodes_add(int node, unsigned long start, unsigned long end) printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); } - if (changed) + if (changed) { + node_set(node, cpu_nodes_parsed); printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); + } } /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ -- cgit v1.2.3 From 73472a46b5b28116b145fb5fc05242c1aa8e1461 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Thu, 21 Jan 2010 11:09:52 -0800 Subject: x86: Disable HPET MSI on ATI SB700/SB800 HPET MSI on platforms with ATI SB700/SB800 as they seem to have some side-effects on floppy DMA. Do not use HPET MSI on such platforms. Original problem report from Mark Hounschell http://lkml.indiana.edu/hypermail/linux/kernel/0912.2/01118.html [ This patch needs to go to stable as well. But, there are some conflicts that prevents the patch from going as is. I can rebase/resubmit to stable once the patch goes upstream. hpa: still Cc:'ing stable@ as an FYI. ] Tested-by: Mark Hounschell Signed-off-by: Venkatesh Pallipadi Cc: LKML-Reference: <20100121190952.GA32523@linux-os.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/hpet.h | 1 + arch/x86/kernel/hpet.c | 8 ++++++++ arch/x86/kernel/quirks.c | 13 +++++++++++++ 3 files changed, 22 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 5d89fd2a3690..1d5c08a1bdfd 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -67,6 +67,7 @@ extern unsigned long hpet_address; extern unsigned long force_hpet_address; extern u8 hpet_blockid; extern int hpet_force_user; +extern u8 hpet_msi_disable; extern int is_hpet_enabled(void); extern int hpet_enable(void); extern void hpet_disable(void); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ba6e65884603..ad80a1c718c6 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -34,6 +34,8 @@ */ unsigned long hpet_address; u8 hpet_blockid; /* OS timer block num */ +u8 hpet_msi_disable; + #ifdef CONFIG_PCI_MSI static unsigned long hpet_num_timers; #endif @@ -596,6 +598,9 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) unsigned int num_timers_used = 0; int i; + if (hpet_msi_disable) + return; + if (boot_cpu_has(X86_FEATURE_ARAT)) return; id = hpet_readl(HPET_ID); @@ -928,6 +933,9 @@ static __init int hpet_late_init(void) hpet_reserve_platform_timers(hpet_readl(HPET_ID)); hpet_print_config(); + if (hpet_msi_disable) + return 0; + if (boot_cpu_has(X86_FEATURE_ARAT)) return 0; diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 18093d7498f0..12e9feaa2f7a 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -491,6 +491,19 @@ void force_hpet_resume(void) break; } } + +/* + * HPET MSI on some boards (ATI SB700/SB800) has side effect on + * floppy DMA. Disable HPET MSI on such platforms. + */ +static void force_disable_hpet_msi(struct pci_dev *unused) +{ + hpet_msi_disable = 1; +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, + force_disable_hpet_msi); + #endif #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) -- cgit v1.2.3 From 3b2e3d85aeb80769fb96c15ee4f6e14135328471 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 22 Jan 2010 21:34:56 +0100 Subject: Revert "x86: ucode-amd: Load ucode-patches once ..." Commit d1c84f79a6ba992dc01e312c44a21496303874d6 leads to a regression when microcode_amd.c is compiled into the kernel. It causes a big boot delay because the firmware is not available. See http://marc.info/?l=linux-kernel&m=126267290920060 It also renders the reload sysfs attribute useless. Fixing this is too intrusive for an -rc5 kernel. Thus I'd like to restore the microcode loading behaviour of kernel 2.6.32. CC: Gene Heskett Signed-off-by: Andreas Herrmann LKML-Reference: <20100122203456.GB13792@alberich.amd.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/microcode.h | 2 -- arch/x86/kernel/microcode_amd.c | 44 ++++++++++++---------------------------- arch/x86/kernel/microcode_core.c | 6 ------ 3 files changed, 13 insertions(+), 39 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index c24ca9a56458..ef51b501e22a 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -12,8 +12,6 @@ struct device; enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; struct microcode_ops { - void (*init)(struct device *device); - void (*fini)(void); enum ucode_state (*request_microcode_user) (int cpu, const void __user *buf, size_t size); diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 37542b67c57e..e1af7c055c7d 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -36,9 +36,6 @@ MODULE_LICENSE("GPL v2"); #define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 #define UCODE_UCODE_TYPE 0x00000001 -const struct firmware *firmware; -static int supported_cpu; - struct equiv_cpu_entry { u32 installed_cpu; u32 fixed_errata_mask; @@ -77,12 +74,15 @@ static struct equiv_cpu_entry *equiv_cpu_table; static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { + struct cpuinfo_x86 *c = &cpu_data(cpu); u32 dummy; - if (!supported_cpu) - return -1; - memset(csig, 0, sizeof(*csig)); + if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { + pr_warning("microcode: CPU%d: AMD CPU family 0x%x not " + "supported\n", cpu, c->x86); + return -1; + } rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev); return 0; @@ -294,10 +294,14 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) static enum ucode_state request_microcode_fw(int cpu, struct device *device) { + const char *fw_name = "amd-ucode/microcode_amd.bin"; + const struct firmware *firmware; enum ucode_state ret; - if (firmware == NULL) + if (request_firmware(&firmware, fw_name, device)) { + printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); return UCODE_NFOUND; + } if (*(u32 *)firmware->data != UCODE_MAGIC) { pr_err("invalid UCODE_MAGIC (0x%08x)\n", @@ -307,6 +311,8 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) ret = generic_load_microcode(cpu, firmware->data, firmware->size); + release_firmware(firmware); + return ret; } @@ -325,31 +331,7 @@ static void microcode_fini_cpu_amd(int cpu) uci->mc = NULL; } -void init_microcode_amd(struct device *device) -{ - const char *fw_name = "amd-ucode/microcode_amd.bin"; - struct cpuinfo_x86 *c = &boot_cpu_data; - - WARN_ON(c->x86_vendor != X86_VENDOR_AMD); - - if (c->x86 < 0x10) { - pr_warning("AMD CPU family 0x%x not supported\n", c->x86); - return; - } - supported_cpu = 1; - - if (request_firmware(&firmware, fw_name, device)) - pr_err("failed to load file %s\n", fw_name); -} - -void fini_microcode_amd(void) -{ - release_firmware(firmware); -} - static struct microcode_ops microcode_amd_ops = { - .init = init_microcode_amd, - .fini = fini_microcode_amd, .request_microcode_user = request_microcode_user, .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info_amd, diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 0c8632433090..cceb5bc3c3c2 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -521,9 +521,6 @@ static int __init microcode_init(void) return PTR_ERR(microcode_pdev); } - if (microcode_ops->init) - microcode_ops->init(µcode_pdev->dev); - get_online_cpus(); mutex_lock(µcode_mutex); @@ -566,9 +563,6 @@ static void __exit microcode_exit(void) platform_device_unregister(microcode_pdev); - if (microcode_ops->fini) - microcode_ops->fini(); - microcode_ops = NULL; pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); -- cgit v1.2.3 From b160091802d4a76dd063facb09fcf10bf5d5d747 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sat, 23 Jan 2010 18:27:47 -0800 Subject: x86: Remove "x86 CPU features in debugfs" (CONFIG_X86_CPU_DEBUG) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_X86_CPU_DEBUG, which provides some parsed versions of the x86 CPU configuration via debugfs, has caused boot failures on real hardware. The value of this feature has been marginal at best, as all this information is already available to userspace via generic interfaces. Causes crashes that have not been fixed + minimal utility -> remove. See the referenced LKML thread for more information. Reported-by: Ozan ÇaÄŸlayan Signed-off-by: H. Peter Anvin LKML-Reference: Cc: Jaswinder Singh Rajput Cc: Linus Torvalds Cc: Rafael J. Wysocki Cc: Yinghai Lu Cc: --- arch/x86/Kconfig | 6 - arch/x86/include/asm/cpu_debug.h | 127 -------- arch/x86/kernel/cpu/Makefile | 2 - arch/x86/kernel/cpu/cpu_debug.c | 688 --------------------------------------- 4 files changed, 823 deletions(-) delete mode 100644 arch/x86/include/asm/cpu_debug.h delete mode 100644 arch/x86/kernel/cpu/cpu_debug.c (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cbcbfdee3ee0..eb4092568f9e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -989,12 +989,6 @@ config X86_CPUID with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to /dev/cpu/31/cpuid. -config X86_CPU_DEBUG - tristate "/sys/kernel/debug/x86/cpu/* - CPU Debug support" - ---help--- - If you select this option, this will provide various x86 CPUs - information through debugfs. - choice prompt "High Memory Support" default HIGHMEM4G if !X86_NUMAQ diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h deleted file mode 100644 index d96c1ee3a95c..000000000000 --- a/arch/x86/include/asm/cpu_debug.h +++ /dev/null @@ -1,127 +0,0 @@ -#ifndef _ASM_X86_CPU_DEBUG_H -#define _ASM_X86_CPU_DEBUG_H - -/* - * CPU x86 architecture debug - * - * Copyright(C) 2009 Jaswinder Singh Rajput - */ - -/* Register flags */ -enum cpu_debug_bit { -/* Model Specific Registers (MSRs) */ - CPU_MC_BIT, /* Machine Check */ - CPU_MONITOR_BIT, /* Monitor */ - CPU_TIME_BIT, /* Time */ - CPU_PMC_BIT, /* Performance Monitor */ - CPU_PLATFORM_BIT, /* Platform */ - CPU_APIC_BIT, /* APIC */ - CPU_POWERON_BIT, /* Power-on */ - CPU_CONTROL_BIT, /* Control */ - CPU_FEATURES_BIT, /* Features control */ - CPU_LBRANCH_BIT, /* Last Branch */ - CPU_BIOS_BIT, /* BIOS */ - CPU_FREQ_BIT, /* Frequency */ - CPU_MTTR_BIT, /* MTRR */ - CPU_PERF_BIT, /* Performance */ - CPU_CACHE_BIT, /* Cache */ - CPU_SYSENTER_BIT, /* Sysenter */ - CPU_THERM_BIT, /* Thermal */ - CPU_MISC_BIT, /* Miscellaneous */ - CPU_DEBUG_BIT, /* Debug */ - CPU_PAT_BIT, /* PAT */ - CPU_VMX_BIT, /* VMX */ - CPU_CALL_BIT, /* System Call */ - CPU_BASE_BIT, /* BASE Address */ - CPU_VER_BIT, /* Version ID */ - CPU_CONF_BIT, /* Configuration */ - CPU_SMM_BIT, /* System mgmt mode */ - CPU_SVM_BIT, /*Secure Virtual Machine*/ - CPU_OSVM_BIT, /* OS-Visible Workaround*/ -/* Standard Registers */ - CPU_TSS_BIT, /* Task Stack Segment */ - CPU_CR_BIT, /* Control Registers */ - CPU_DT_BIT, /* Descriptor Table */ -/* End of Registers flags */ - CPU_REG_ALL_BIT, /* Select all Registers */ -}; - -#define CPU_REG_ALL (~0) /* Select all Registers */ - -#define CPU_MC (1 << CPU_MC_BIT) -#define CPU_MONITOR (1 << CPU_MONITOR_BIT) -#define CPU_TIME (1 << CPU_TIME_BIT) -#define CPU_PMC (1 << CPU_PMC_BIT) -#define CPU_PLATFORM (1 << CPU_PLATFORM_BIT) -#define CPU_APIC (1 << CPU_APIC_BIT) -#define CPU_POWERON (1 << CPU_POWERON_BIT) -#define CPU_CONTROL (1 << CPU_CONTROL_BIT) -#define CPU_FEATURES (1 << CPU_FEATURES_BIT) -#define CPU_LBRANCH (1 << CPU_LBRANCH_BIT) -#define CPU_BIOS (1 << CPU_BIOS_BIT) -#define CPU_FREQ (1 << CPU_FREQ_BIT) -#define CPU_MTRR (1 << CPU_MTTR_BIT) -#define CPU_PERF (1 << CPU_PERF_BIT) -#define CPU_CACHE (1 << CPU_CACHE_BIT) -#define CPU_SYSENTER (1 << CPU_SYSENTER_BIT) -#define CPU_THERM (1 << CPU_THERM_BIT) -#define CPU_MISC (1 << CPU_MISC_BIT) -#define CPU_DEBUG (1 << CPU_DEBUG_BIT) -#define CPU_PAT (1 << CPU_PAT_BIT) -#define CPU_VMX (1 << CPU_VMX_BIT) -#define CPU_CALL (1 << CPU_CALL_BIT) -#define CPU_BASE (1 << CPU_BASE_BIT) -#define CPU_VER (1 << CPU_VER_BIT) -#define CPU_CONF (1 << CPU_CONF_BIT) -#define CPU_SMM (1 << CPU_SMM_BIT) -#define CPU_SVM (1 << CPU_SVM_BIT) -#define CPU_OSVM (1 << CPU_OSVM_BIT) -#define CPU_TSS (1 << CPU_TSS_BIT) -#define CPU_CR (1 << CPU_CR_BIT) -#define CPU_DT (1 << CPU_DT_BIT) - -/* Register file flags */ -enum cpu_file_bit { - CPU_INDEX_BIT, /* index */ - CPU_VALUE_BIT, /* value */ -}; - -#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT) - -#define MAX_CPU_FILES 512 - -struct cpu_private { - unsigned cpu; - unsigned type; - unsigned reg; - unsigned file; -}; - -struct cpu_debug_base { - char *name; /* Register name */ - unsigned flag; /* Register flag */ - unsigned write; /* Register write flag */ -}; - -/* - * Currently it looks similar to cpu_debug_base but once we add more files - * cpu_file_base will go in different direction - */ -struct cpu_file_base { - char *name; /* Register file name */ - unsigned flag; /* Register file flag */ - unsigned write; /* Register write flag */ -}; - -struct cpu_cpuX_base { - struct dentry *dentry; /* Register dentry */ - int init; /* Register index file */ -}; - -struct cpu_debug_range { - unsigned min; /* Register range min */ - unsigned max; /* Register range max */ - unsigned flag; /* Supported flags */ -}; - -#endif /* _ASM_X86_CPU_DEBUG_H */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 1d2cb383410e..c202b62f3671 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -19,8 +19,6 @@ obj-y += vmware.o hypervisor.o sched.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += bugs_64.o -obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o - obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c deleted file mode 100644 index b368cd862997..000000000000 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ /dev/null @@ -1,688 +0,0 @@ -/* - * CPU x86 architecture debug code - * - * Copyright(C) 2009 Jaswinder Singh Rajput - * - * For licencing details see kernel-base/COPYING - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpud_arr); -static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], cpud_priv_arr); -static DEFINE_PER_CPU(int, cpud_priv_count); - -static DEFINE_MUTEX(cpu_debug_lock); - -static struct dentry *cpu_debugfs_dir; - -static struct cpu_debug_base cpu_base[] = { - { "mc", CPU_MC, 0 }, - { "monitor", CPU_MONITOR, 0 }, - { "time", CPU_TIME, 0 }, - { "pmc", CPU_PMC, 1 }, - { "platform", CPU_PLATFORM, 0 }, - { "apic", CPU_APIC, 0 }, - { "poweron", CPU_POWERON, 0 }, - { "control", CPU_CONTROL, 0 }, - { "features", CPU_FEATURES, 0 }, - { "lastbranch", CPU_LBRANCH, 0 }, - { "bios", CPU_BIOS, 0 }, - { "freq", CPU_FREQ, 0 }, - { "mtrr", CPU_MTRR, 0 }, - { "perf", CPU_PERF, 0 }, - { "cache", CPU_CACHE, 0 }, - { "sysenter", CPU_SYSENTER, 0 }, - { "therm", CPU_THERM, 0 }, - { "misc", CPU_MISC, 0 }, - { "debug", CPU_DEBUG, 0 }, - { "pat", CPU_PAT, 0 }, - { "vmx", CPU_VMX, 0 }, - { "call", CPU_CALL, 0 }, - { "base", CPU_BASE, 0 }, - { "ver", CPU_VER, 0 }, - { "conf", CPU_CONF, 0 }, - { "smm", CPU_SMM, 0 }, - { "svm", CPU_SVM, 0 }, - { "osvm", CPU_OSVM, 0 }, - { "tss", CPU_TSS, 0 }, - { "cr", CPU_CR, 0 }, - { "dt", CPU_DT, 0 }, - { "registers", CPU_REG_ALL, 0 }, -}; - -static struct cpu_file_base cpu_file[] = { - { "index", CPU_REG_ALL, 0 }, - { "value", CPU_REG_ALL, 1 }, -}; - -/* CPU Registers Range */ -static struct cpu_debug_range cpu_reg_range[] = { - { 0x00000000, 0x00000001, CPU_MC, }, - { 0x00000006, 0x00000007, CPU_MONITOR, }, - { 0x00000010, 0x00000010, CPU_TIME, }, - { 0x00000011, 0x00000013, CPU_PMC, }, - { 0x00000017, 0x00000017, CPU_PLATFORM, }, - { 0x0000001B, 0x0000001B, CPU_APIC, }, - { 0x0000002A, 0x0000002B, CPU_POWERON, }, - { 0x0000002C, 0x0000002C, CPU_FREQ, }, - { 0x0000003A, 0x0000003A, CPU_CONTROL, }, - { 0x00000040, 0x00000047, CPU_LBRANCH, }, - { 0x00000060, 0x00000067, CPU_LBRANCH, }, - { 0x00000079, 0x00000079, CPU_BIOS, }, - { 0x00000088, 0x0000008A, CPU_CACHE, }, - { 0x0000008B, 0x0000008B, CPU_BIOS, }, - { 0x0000009B, 0x0000009B, CPU_MONITOR, }, - { 0x000000C1, 0x000000C4, CPU_PMC, }, - { 0x000000CD, 0x000000CD, CPU_FREQ, }, - { 0x000000E7, 0x000000E8, CPU_PERF, }, - { 0x000000FE, 0x000000FE, CPU_MTRR, }, - - { 0x00000116, 0x0000011E, CPU_CACHE, }, - { 0x00000174, 0x00000176, CPU_SYSENTER, }, - { 0x00000179, 0x0000017B, CPU_MC, }, - { 0x00000186, 0x00000189, CPU_PMC, }, - { 0x00000198, 0x00000199, CPU_PERF, }, - { 0x0000019A, 0x0000019A, CPU_TIME, }, - { 0x0000019B, 0x0000019D, CPU_THERM, }, - { 0x000001A0, 0x000001A0, CPU_MISC, }, - { 0x000001C9, 0x000001C9, CPU_LBRANCH, }, - { 0x000001D7, 0x000001D8, CPU_LBRANCH, }, - { 0x000001D9, 0x000001D9, CPU_DEBUG, }, - { 0x000001DA, 0x000001E0, CPU_LBRANCH, }, - - { 0x00000200, 0x0000020F, CPU_MTRR, }, - { 0x00000250, 0x00000250, CPU_MTRR, }, - { 0x00000258, 0x00000259, CPU_MTRR, }, - { 0x00000268, 0x0000026F, CPU_MTRR, }, - { 0x00000277, 0x00000277, CPU_PAT, }, - { 0x000002FF, 0x000002FF, CPU_MTRR, }, - - { 0x00000300, 0x00000311, CPU_PMC, }, - { 0x00000345, 0x00000345, CPU_PMC, }, - { 0x00000360, 0x00000371, CPU_PMC, }, - { 0x0000038D, 0x00000390, CPU_PMC, }, - { 0x000003A0, 0x000003BE, CPU_PMC, }, - { 0x000003C0, 0x000003CD, CPU_PMC, }, - { 0x000003E0, 0x000003E1, CPU_PMC, }, - { 0x000003F0, 0x000003F2, CPU_PMC, }, - - { 0x00000400, 0x00000417, CPU_MC, }, - { 0x00000480, 0x0000048B, CPU_VMX, }, - - { 0x00000600, 0x00000600, CPU_DEBUG, }, - { 0x00000680, 0x0000068F, CPU_LBRANCH, }, - { 0x000006C0, 0x000006CF, CPU_LBRANCH, }, - - { 0x000107CC, 0x000107D3, CPU_PMC, }, - - { 0xC0000080, 0xC0000080, CPU_FEATURES, }, - { 0xC0000081, 0xC0000084, CPU_CALL, }, - { 0xC0000100, 0xC0000102, CPU_BASE, }, - { 0xC0000103, 0xC0000103, CPU_TIME, }, - - { 0xC0010000, 0xC0010007, CPU_PMC, }, - { 0xC0010010, 0xC0010010, CPU_CONF, }, - { 0xC0010015, 0xC0010015, CPU_CONF, }, - { 0xC0010016, 0xC001001A, CPU_MTRR, }, - { 0xC001001D, 0xC001001D, CPU_MTRR, }, - { 0xC001001F, 0xC001001F, CPU_CONF, }, - { 0xC0010030, 0xC0010035, CPU_BIOS, }, - { 0xC0010044, 0xC0010048, CPU_MC, }, - { 0xC0010050, 0xC0010056, CPU_SMM, }, - { 0xC0010058, 0xC0010058, CPU_CONF, }, - { 0xC0010060, 0xC0010060, CPU_CACHE, }, - { 0xC0010061, 0xC0010068, CPU_SMM, }, - { 0xC0010069, 0xC001006B, CPU_SMM, }, - { 0xC0010070, 0xC0010071, CPU_SMM, }, - { 0xC0010111, 0xC0010113, CPU_SMM, }, - { 0xC0010114, 0xC0010118, CPU_SVM, }, - { 0xC0010140, 0xC0010141, CPU_OSVM, }, - { 0xC0011022, 0xC0011023, CPU_CONF, }, -}; - -static int is_typeflag_valid(unsigned cpu, unsigned flag) -{ - int i; - - /* Standard Registers should be always valid */ - if (flag >= CPU_TSS) - return 1; - - for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { - if (cpu_reg_range[i].flag == flag) - return 1; - } - - /* Invalid */ - return 0; -} - -static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max, - int index, unsigned flag) -{ - if (cpu_reg_range[index].flag == flag) { - *min = cpu_reg_range[index].min; - *max = cpu_reg_range[index].max; - } else - *max = 0; - - return *max; -} - -/* This function can also be called with seq = NULL for printk */ -static void print_cpu_data(struct seq_file *seq, unsigned type, - u32 low, u32 high) -{ - struct cpu_private *priv; - u64 val = high; - - if (seq) { - priv = seq->private; - if (priv->file) { - val = (val << 32) | low; - seq_printf(seq, "0x%llx\n", val); - } else - seq_printf(seq, " %08x: %08x_%08x\n", - type, high, low); - } else - printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low); -} - -/* This function can also be called with seq = NULL for printk */ -static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag) -{ - unsigned msr, msr_min, msr_max; - struct cpu_private *priv; - u32 low, high; - int i; - - if (seq) { - priv = seq->private; - if (priv->file) { - if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg, - &low, &high)) - print_cpu_data(seq, priv->reg, low, high); - return; - } - } - - for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { - if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag)) - continue; - - for (msr = msr_min; msr <= msr_max; msr++) { - if (rdmsr_safe_on_cpu(cpu, msr, &low, &high)) - continue; - print_cpu_data(seq, msr, low, high); - } - } -} - -static void print_tss(void *arg) -{ - struct pt_regs *regs = task_pt_regs(current); - struct seq_file *seq = arg; - unsigned int seg; - - seq_printf(seq, " RAX\t: %016lx\n", regs->ax); - seq_printf(seq, " RBX\t: %016lx\n", regs->bx); - seq_printf(seq, " RCX\t: %016lx\n", regs->cx); - seq_printf(seq, " RDX\t: %016lx\n", regs->dx); - - seq_printf(seq, " RSI\t: %016lx\n", regs->si); - seq_printf(seq, " RDI\t: %016lx\n", regs->di); - seq_printf(seq, " RBP\t: %016lx\n", regs->bp); - seq_printf(seq, " ESP\t: %016lx\n", regs->sp); - -#ifdef CONFIG_X86_64 - seq_printf(seq, " R08\t: %016lx\n", regs->r8); - seq_printf(seq, " R09\t: %016lx\n", regs->r9); - seq_printf(seq, " R10\t: %016lx\n", regs->r10); - seq_printf(seq, " R11\t: %016lx\n", regs->r11); - seq_printf(seq, " R12\t: %016lx\n", regs->r12); - seq_printf(seq, " R13\t: %016lx\n", regs->r13); - seq_printf(seq, " R14\t: %016lx\n", regs->r14); - seq_printf(seq, " R15\t: %016lx\n", regs->r15); -#endif - - asm("movl %%cs,%0" : "=r" (seg)); - seq_printf(seq, " CS\t: %04x\n", seg); - asm("movl %%ds,%0" : "=r" (seg)); - seq_printf(seq, " DS\t: %04x\n", seg); - seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff); - asm("movl %%es,%0" : "=r" (seg)); - seq_printf(seq, " ES\t: %04x\n", seg); - asm("movl %%fs,%0" : "=r" (seg)); - seq_printf(seq, " FS\t: %04x\n", seg); - asm("movl %%gs,%0" : "=r" (seg)); - seq_printf(seq, " GS\t: %04x\n", seg); - - seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags); - - seq_printf(seq, " EIP\t: %016lx\n", regs->ip); -} - -static void print_cr(void *arg) -{ - struct seq_file *seq = arg; - - seq_printf(seq, " cr0\t: %016lx\n", read_cr0()); - seq_printf(seq, " cr2\t: %016lx\n", read_cr2()); - seq_printf(seq, " cr3\t: %016lx\n", read_cr3()); - seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe()); -#ifdef CONFIG_X86_64 - seq_printf(seq, " cr8\t: %016lx\n", read_cr8()); -#endif -} - -static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt) -{ - seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size)); -} - -static void print_dt(void *seq) -{ - struct desc_ptr dt; - unsigned long ldt; - - /* IDT */ - store_idt((struct desc_ptr *)&dt); - print_desc_ptr("IDT", seq, dt); - - /* GDT */ - store_gdt((struct desc_ptr *)&dt); - print_desc_ptr("GDT", seq, dt); - - /* LDT */ - store_ldt(ldt); - seq_printf(seq, " LDT\t: %016lx\n", ldt); - - /* TR */ - store_tr(ldt); - seq_printf(seq, " TR\t: %016lx\n", ldt); -} - -static void print_dr(void *arg) -{ - struct seq_file *seq = arg; - unsigned long dr; - int i; - - for (i = 0; i < 8; i++) { - /* Ignore db4, db5 */ - if ((i == 4) || (i == 5)) - continue; - get_debugreg(dr, i); - seq_printf(seq, " dr%d\t: %016lx\n", i, dr); - } - - seq_printf(seq, "\n MSR\t:\n"); -} - -static void print_apic(void *arg) -{ - struct seq_file *seq = arg; - -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(seq, " LAPIC\t:\n"); - seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24); - seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR)); - seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI)); - seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI)); - seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI)); - seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR)); - seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR)); - seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV)); - seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR)); - seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR)); - seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR)); - seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2)); - seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT)); - seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR)); - seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC)); - seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0)); - seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1)); - seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR)); - seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT)); - seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT)); - seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR)); - if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { - unsigned int i, v, maxeilvt; - - v = apic_read(APIC_EFEAT); - maxeilvt = (v >> 16) & 0xff; - seq_printf(seq, " EFEAT\t\t: %08x\n", v); - seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL)); - - for (i = 0; i < maxeilvt; i++) { - v = apic_read(APIC_EILVTn(i)); - seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v); - } - } -#endif /* CONFIG_X86_LOCAL_APIC */ - seq_printf(seq, "\n MSR\t:\n"); -} - -static int cpu_seq_show(struct seq_file *seq, void *v) -{ - struct cpu_private *priv = seq->private; - - if (priv == NULL) - return -EINVAL; - - switch (cpu_base[priv->type].flag) { - case CPU_TSS: - smp_call_function_single(priv->cpu, print_tss, seq, 1); - break; - case CPU_CR: - smp_call_function_single(priv->cpu, print_cr, seq, 1); - break; - case CPU_DT: - smp_call_function_single(priv->cpu, print_dt, seq, 1); - break; - case CPU_DEBUG: - if (priv->file == CPU_INDEX_BIT) - smp_call_function_single(priv->cpu, print_dr, seq, 1); - print_msr(seq, priv->cpu, cpu_base[priv->type].flag); - break; - case CPU_APIC: - if (priv->file == CPU_INDEX_BIT) - smp_call_function_single(priv->cpu, print_apic, seq, 1); - print_msr(seq, priv->cpu, cpu_base[priv->type].flag); - break; - - default: - print_msr(seq, priv->cpu, cpu_base[priv->type].flag); - break; - } - seq_printf(seq, "\n"); - - return 0; -} - -static void *cpu_seq_start(struct seq_file *seq, loff_t *pos) -{ - if (*pos == 0) /* One time is enough ;-) */ - return seq; - - return NULL; -} - -static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - (*pos)++; - - return cpu_seq_start(seq, pos); -} - -static void cpu_seq_stop(struct seq_file *seq, void *v) -{ -} - -static const struct seq_operations cpu_seq_ops = { - .start = cpu_seq_start, - .next = cpu_seq_next, - .stop = cpu_seq_stop, - .show = cpu_seq_show, -}; - -static int cpu_seq_open(struct inode *inode, struct file *file) -{ - struct cpu_private *priv = inode->i_private; - struct seq_file *seq; - int err; - - err = seq_open(file, &cpu_seq_ops); - if (!err) { - seq = file->private_data; - seq->private = priv; - } - - return err; -} - -static int write_msr(struct cpu_private *priv, u64 val) -{ - u32 low, high; - - high = (val >> 32) & 0xffffffff; - low = val & 0xffffffff; - - if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high)) - return 0; - - return -EPERM; -} - -static int write_cpu_register(struct cpu_private *priv, const char *buf) -{ - int ret = -EPERM; - u64 val; - - ret = strict_strtoull(buf, 0, &val); - if (ret < 0) - return ret; - - /* Supporting only MSRs */ - if (priv->type < CPU_TSS_BIT) - return write_msr(priv, val); - - return ret; -} - -static ssize_t cpu_write(struct file *file, const char __user *ubuf, - size_t count, loff_t *off) -{ - struct seq_file *seq = file->private_data; - struct cpu_private *priv = seq->private; - char buf[19]; - - if ((priv == NULL) || (count >= sizeof(buf))) - return -EINVAL; - - if (copy_from_user(&buf, ubuf, count)) - return -EFAULT; - - buf[count] = 0; - - if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write)) - if (!write_cpu_register(priv, buf)) - return count; - - return -EACCES; -} - -static const struct file_operations cpu_fops = { - .owner = THIS_MODULE, - .open = cpu_seq_open, - .read = seq_read, - .write = cpu_write, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg, - unsigned file, struct dentry *dentry) -{ - struct cpu_private *priv = NULL; - - /* Already intialized */ - if (file == CPU_INDEX_BIT) - if (per_cpu(cpud_arr[type].init, cpu)) - return 0; - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (priv == NULL) - return -ENOMEM; - - priv->cpu = cpu; - priv->type = type; - priv->reg = reg; - priv->file = file; - mutex_lock(&cpu_debug_lock); - per_cpu(cpud_priv_arr[type], cpu) = priv; - per_cpu(cpud_priv_count, cpu)++; - mutex_unlock(&cpu_debug_lock); - - if (file) - debugfs_create_file(cpu_file[file].name, S_IRUGO, - dentry, (void *)priv, &cpu_fops); - else { - debugfs_create_file(cpu_base[type].name, S_IRUGO, - per_cpu(cpud_arr[type].dentry, cpu), - (void *)priv, &cpu_fops); - mutex_lock(&cpu_debug_lock); - per_cpu(cpud_arr[type].init, cpu) = 1; - mutex_unlock(&cpu_debug_lock); - } - - return 0; -} - -static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg, - struct dentry *dentry) -{ - unsigned file; - int err = 0; - - for (file = 0; file < ARRAY_SIZE(cpu_file); file++) { - err = cpu_create_file(cpu, type, reg, file, dentry); - if (err) - return err; - } - - return err; -} - -static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry) -{ - struct dentry *cpu_dentry = NULL; - unsigned reg, reg_min, reg_max; - int i, err = 0; - char reg_dir[12]; - u32 low, high; - - for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { - if (!get_cpu_range(cpu, ®_min, ®_max, i, - cpu_base[type].flag)) - continue; - - for (reg = reg_min; reg <= reg_max; reg++) { - if (rdmsr_safe_on_cpu(cpu, reg, &low, &high)) - continue; - - sprintf(reg_dir, "0x%x", reg); - cpu_dentry = debugfs_create_dir(reg_dir, dentry); - err = cpu_init_regfiles(cpu, type, reg, cpu_dentry); - if (err) - return err; - } - } - - return err; -} - -static int cpu_init_allreg(unsigned cpu, struct dentry *dentry) -{ - struct dentry *cpu_dentry = NULL; - unsigned type; - int err = 0; - - for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) { - if (!is_typeflag_valid(cpu, cpu_base[type].flag)) - continue; - cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry); - per_cpu(cpud_arr[type].dentry, cpu) = cpu_dentry; - - if (type < CPU_TSS_BIT) - err = cpu_init_msr(cpu, type, cpu_dentry); - else - err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT, - cpu_dentry); - if (err) - return err; - } - - return err; -} - -static int cpu_init_cpu(void) -{ - struct dentry *cpu_dentry = NULL; - struct cpuinfo_x86 *cpui; - char cpu_dir[12]; - unsigned cpu; - int err = 0; - - for (cpu = 0; cpu < nr_cpu_ids; cpu++) { - cpui = &cpu_data(cpu); - if (!cpu_has(cpui, X86_FEATURE_MSR)) - continue; - - sprintf(cpu_dir, "cpu%d", cpu); - cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir); - err = cpu_init_allreg(cpu, cpu_dentry); - - pr_info("cpu%d(%d) debug files %d\n", - cpu, nr_cpu_ids, per_cpu(cpud_priv_count, cpu)); - if (per_cpu(cpud_priv_count, cpu) > MAX_CPU_FILES) { - pr_err("Register files count %d exceeds limit %d\n", - per_cpu(cpud_priv_count, cpu), MAX_CPU_FILES); - per_cpu(cpud_priv_count, cpu) = MAX_CPU_FILES; - err = -ENFILE; - } - if (err) - return err; - } - - return err; -} - -static int __init cpu_debug_init(void) -{ - cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir); - - return cpu_init_cpu(); -} - -static void __exit cpu_debug_exit(void) -{ - int i, cpu; - - if (cpu_debugfs_dir) - debugfs_remove_recursive(cpu_debugfs_dir); - - for (cpu = 0; cpu < nr_cpu_ids; cpu++) - for (i = 0; i < per_cpu(cpud_priv_count, cpu); i++) - kfree(per_cpu(cpud_priv_arr[i], cpu)); -} - -module_init(cpu_debug_init); -module_exit(cpu_debug_exit); - -MODULE_AUTHOR("Jaswinder Singh Rajput"); -MODULE_DESCRIPTION("CPU Debug module"); -MODULE_LICENSE("GPL"); -- cgit v1.2.3 From a5d36f82c4f3e852b61fdf1fee13463c8aa91b90 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 29 Dec 2009 12:42:16 +0200 Subject: KVM: Fix race between APIC TMR and IRR When we queue an interrupt to the local apic, we set the IRR before the TMR. The vcpu can pick up the IRR and inject the interrupt before setting the TMR, and perhaps even EOI it, causing incorrect behaviour. The race is really insignificant since it can only occur on the first interrupt (usually following interrupts will not change TMR), but it's better closed than open. Fixed by reordering setting the TMR vs IRR. Cc: stable@kernel.org Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/lapic.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3063a0c4858b..ba8c045da782 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -373,6 +373,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, if (unlikely(!apic_enabled(apic))) break; + if (trig_mode) { + apic_debug("level trig mode for vector %d", vector); + apic_set_vector(vector, apic->regs + APIC_TMR); + } else + apic_clear_vector(vector, apic->regs + APIC_TMR); + result = !apic_test_and_set_irr(vector, apic); trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, trig_mode, vector, !result); @@ -383,11 +389,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, break; } - if (trig_mode) { - apic_debug("level trig mode for vector %d", vector); - apic_set_vector(vector, apic->regs + APIC_TMR); - } else - apic_clear_vector(vector, apic->regs + APIC_TMR); kvm_vcpu_kick(vcpu); break; -- cgit v1.2.3 From 82b7005f0e72d8d1a8226e4c192cbb0850d10b3f Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 5 Jan 2010 19:02:28 +0800 Subject: KVM: x86: Fix host_mapping_level() When found a error hva, should not return PAGE_SIZE but the level... Also clean up the coding style of the following loop. Cc: stable@kernel.org Signed-off-by: Sheng Yang Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4c3e5b2314cb..89a49fb46a27 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -477,7 +477,7 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn) addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) - return page_size; + return PT_PAGE_TABLE_LEVEL; down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, addr); @@ -515,11 +515,9 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) if (host_level == PT_PAGE_TABLE_LEVEL) return host_level; - for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) { - + for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) break; - } return level - 1; } -- cgit v1.2.3 From a6085fbaf65ab09bfb5ec8d902d6d21680fe1895 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 14 Jan 2010 17:41:27 -0200 Subject: KVM: MMU: bail out pagewalk on kvm_read_guest error Exit the guest pagetable walk loop if reading gpte failed. Otherwise its possible to enter an endless loop processing the previous present pte. Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/paging_tmpl.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 58a0f1e88596..ede2131a9225 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -150,7 +150,9 @@ walk: walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; - kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); + if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) + goto not_present; + trace_kvm_mmu_paging_element(pte, walker->level); if (!is_present_gpte(pte)) -- cgit v1.2.3 From 36cb93fd6b6bf7e9163a69a8bf20207aed5fea44 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 22 Jan 2010 14:18:47 +0800 Subject: KVM: x86: Fix probable memory leak of vcpu->arch.mce_banks vcpu->arch.mce_banks is malloc in kvm_arch_vcpu_init(), but never free in any place, this may cause memory leak. So this patch fixed to free it in kvm_arch_vcpu_uninit(). Cc: stable@kernel.org Signed-off-by: Wei Yongjun Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6651dbf58675..b265eecc741f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5088,6 +5088,7 @@ fail: void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { + kfree(vcpu->arch.mce_banks); kvm_free_lapic(vcpu); down_read(&vcpu->kvm->slots_lock); kvm_mmu_destroy(vcpu); -- cgit v1.2.3 From 443c39bc9ef7d8f648408d74c97e943f3bb3f48a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 22 Jan 2010 14:21:29 +0800 Subject: KVM: x86: Fix leak of free lapic date in kvm_arch_vcpu_init() In function kvm_arch_vcpu_init(), if the memory malloc for vcpu->arch.mce_banks is fail, it does not free the memory of lapic date. This patch fixed it. Cc: stable@kernel.org Signed-off-by: Wei Yongjun Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b265eecc741f..1ddcad452add 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5072,12 +5072,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) GFP_KERNEL); if (!vcpu->arch.mce_banks) { r = -ENOMEM; - goto fail_mmu_destroy; + goto fail_free_lapic; } vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; return 0; - +fail_free_lapic: + kvm_free_lapic(vcpu); fail_mmu_destroy: kvm_mmu_destroy(vcpu); fail_free_pio_data: -- cgit v1.2.3 From d8cc108f4fab42b380c6b3f3356f99e8dd5372e2 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 18 Jan 2010 11:25:36 -0600 Subject: oprofile/x86: fix crash when profiling more than 28 events With multiplexing enabled oprofile crashs when profiling more than 28 events. This patch fixes this. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cb88b1a0bd5f..76d4f566adee 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -222,7 +222,7 @@ static void nmi_cpu_switch(void *dummy) /* move to next set */ si += model->num_counters; - if ((si > model->num_virt_counters) || (counter_config[si].count == 0)) + if ((si >= model->num_virt_counters) || (counter_config[si].count == 0)) per_cpu(switch_index, cpu) = 0; else per_cpu(switch_index, cpu) = si; -- cgit v1.2.3 From e83e452b0692c9c13372540deb88a77d4ae2553d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 21 Jan 2010 23:26:27 +0100 Subject: oprofile/x86: add Xeon 7500 series support Add Xeon 7500 series support to oprofile. Straight forward: it's the same as Core i7, so just detect the model number. No user space changes needed. Signed-off-by: Andi Kleen Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 76d4f566adee..3347f696edc7 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -598,6 +598,7 @@ static int __init ppro_init(char **cpu_type) case 15: case 23: *cpu_type = "i386/core_2"; break; + case 0x2e: case 26: spec = &op_arch_perfmon_spec; *cpu_type = "i386/core_i7"; -- cgit v1.2.3 From da482474b8396e1a099c37ffc6541b78775aedb4 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Tue, 26 Jan 2010 20:37:22 -0600 Subject: x86, msr/cpuid: Pass the number of minors when unregistering MSR and CPUID drivers. Pass the number of minors when unregistering MSR and CPUID drivers. Reported-by: Dean Nelson Signed-off-by: Dean Nelson LKML-Reference: <20100127023722.GA22305@sgi.com> Signed-off-by: Russ Anderson Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpuid.c | 2 +- arch/x86/kernel/msr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index cb27fd6136c9..83e5e628de73 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -229,7 +229,7 @@ static void __exit cpuid_exit(void) for_each_online_cpu(cpu) cpuid_device_destroy(cpu); class_destroy(cpuid_class); - unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); + __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 4bd93c9b2b27..206735ac8cbd 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -285,7 +285,7 @@ static void __exit msr_exit(void) for_each_online_cpu(cpu) msr_device_destroy(cpu); class_destroy(msr_class); - unregister_chrdev(MSR_MAJOR, "cpu/msr"); + __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); unregister_hotcpu_notifier(&msr_class_cpu_notifier); } -- cgit v1.2.3 From aca3bb5910119d4cf6c28568a642582efb4cc14a Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Fri, 22 Jan 2010 09:41:40 -0600 Subject: x86, UV: Fix RTC latency bug by reading replicated cachelines For SGI UV node controllers (HUB) rev 2.0 or greater, use replicated cachelines to read the RTC timer. This optimization allows faster simulataneous reads from a given socket. Signed-off-by: Dimitri Sivanich Cc: Jack Steiner LKML-Reference: <20100122154140.GB4975@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/uv_time.c | 13 ++++++++++++- drivers/char/uv_mmtimer.c | 18 +++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c index 3c84aa001c11..2b75ef638dbc 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/kernel/uv_time.c @@ -282,10 +282,21 @@ static int uv_rtc_unset_timer(int cpu, int force) /* * Read the RTC. + * + * Starting with HUB rev 2.0, the UV RTC register is replicated across all + * cachelines of it's own page. This allows faster simultaneous reads + * from a given socket. */ static cycle_t uv_read_rtc(struct clocksource *cs) { - return (cycle_t)uv_read_local_mmr(UVH_RTC); + unsigned long offset; + + if (uv_get_min_hub_revision_id() == 1) + offset = 0; + else + offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; + + return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); } /* diff --git a/drivers/char/uv_mmtimer.c b/drivers/char/uv_mmtimer.c index 867b67be9f0a..c7072ba14f48 100644 --- a/drivers/char/uv_mmtimer.c +++ b/drivers/char/uv_mmtimer.c @@ -89,13 +89,17 @@ static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case MMTIMER_GETOFFSET: /* offset of the counter */ /* - * UV RTC register is on its own page + * Starting with HUB rev 2.0, the UV RTC register is + * replicated across all cachelines of it's own page. + * This allows faster simultaneous reads from a given socket. + * + * The offset returned is in 64 bit units. */ - if (PAGE_SIZE <= (1 << 16)) - ret = ((UV_LOCAL_MMR_BASE | UVH_RTC) & (PAGE_SIZE-1)) - / 8; + if (uv_get_min_hub_revision_id() == 1) + ret = 0; else - ret = -ENOSYS; + ret = ((uv_blade_processor_id() * L1_CACHE_BYTES) % + PAGE_SIZE) / 8; break; case MMTIMER_GETRES: /* resolution of the clock in 10^-15 s */ @@ -115,8 +119,8 @@ static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd, ret = hweight64(UVH_RTC_REAL_TIME_CLOCK_MASK); break; - case MMTIMER_MMAPAVAIL: /* can we mmap the clock into userspace? */ - ret = (PAGE_SIZE <= (1 << 16)) ? 1 : 0; + case MMTIMER_MMAPAVAIL: + ret = 1; break; case MMTIMER_GETCOUNTER: -- cgit v1.2.3 From 35ea63d70f827a26c150993b4b940925bb02b03f Mon Sep 17 00:00:00 2001 From: Leann Ogasawara Date: Wed, 27 Jan 2010 15:29:18 -0800 Subject: x86: Add Dell OptiPlex 760 reboot quirk Dell OptiPlex 760 hangs on reboot unless reboot=bios is used. Add quirk to reboot through the BIOS. BugLink: https://bugs.launchpad.net/bugs/488319 Signed-off-by: Leann Ogasawara LKML-Reference: <1264634958.27335.1091.camel@emiko> Cc: Signed-off-by: H. Peter Anvin --- arch/x86/kernel/reboot.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 1545bc0c9845..704bddcdf64d 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -203,6 +203,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "0T656F"), }, }, + { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G*/ + .callback = set_bios_reboot, + .ident = "Dell OptiPlex 760", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 760"), + DMI_MATCH(DMI_BOARD_NAME, "0G919G"), + }, + }, { /* Handle problems with rebooting on Dell 2400's */ .callback = set_bios_reboot, .ident = "Dell PowerEdge 2400", -- cgit v1.2.3 From e8e06eae4ffd683931b928f460c11c40cd3f7fd8 Mon Sep 17 00:00:00 2001 From: Jeff Garrett Date: Wed, 27 Jan 2010 22:02:26 -0600 Subject: x86/PCI: remove IOH range fetching Turned out to cause trouble on single IOH machines, and is superceded by _CRS on multi-IOH machines with production BIOSes. Signed-off-by: Jeff Garrett Signed-off-by: Jesse Barnes --- arch/x86/pci/Makefile | 2 +- arch/x86/pci/intel_bus.c | 94 ------------------------------------------------ 2 files changed, 1 insertion(+), 95 deletions(-) delete mode 100644 arch/x86/pci/intel_bus.c (limited to 'arch/x86') diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 564b008a51c7..39fba37f702f 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -15,7 +15,7 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-y += common.o early.o obj-y += amd_bus.o -obj-$(CONFIG_X86_64) += bus_numa.o intel_bus.o +obj-$(CONFIG_X86_64) += bus_numa.o ifeq ($(CONFIG_PCI_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/arch/x86/pci/intel_bus.c b/arch/x86/pci/intel_bus.c deleted file mode 100644 index f81a2fa8fe25..000000000000 --- a/arch/x86/pci/intel_bus.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * to read io range from IOH pci conf, need to do it after mmconfig is there - */ - -#include -#include -#include -#include -#include - -#include "bus_numa.h" - -static inline void print_ioh_resources(struct pci_root_info *info) -{ - int res_num; - int busnum; - int i; - - printk(KERN_DEBUG "IOH bus: [%02x, %02x]\n", - info->bus_min, info->bus_max); - res_num = info->res_num; - busnum = info->bus_min; - for (i = 0; i < res_num; i++) { - struct resource *res; - - res = &info->res[i]; - printk(KERN_DEBUG "IOH bus: %02x index %x %s: [%llx, %llx]\n", - busnum, i, - (res->flags & IORESOURCE_IO) ? "io port" : - "mmio", - res->start, res->end); - } -} - -#define IOH_LIO 0x108 -#define IOH_LMMIOL 0x10c -#define IOH_LMMIOH 0x110 -#define IOH_LMMIOH_BASEU 0x114 -#define IOH_LMMIOH_LIMITU 0x118 -#define IOH_LCFGBUS 0x11c - -static void __devinit pci_root_bus_res(struct pci_dev *dev) -{ - u16 word; - u32 dword; - struct pci_root_info *info; - u16 io_base, io_end; - u32 mmiol_base, mmiol_end; - u64 mmioh_base, mmioh_end; - int bus_base, bus_end; - - /* some sys doesn't get mmconf enabled */ - if (dev->cfg_size < 0x120) - return; - - if (pci_root_num >= PCI_ROOT_NR) { - printk(KERN_DEBUG "intel_bus.c: PCI_ROOT_NR is too small\n"); - return; - } - - info = &pci_root_info[pci_root_num]; - pci_root_num++; - - pci_read_config_word(dev, IOH_LCFGBUS, &word); - bus_base = (word & 0xff); - bus_end = (word & 0xff00) >> 8; - sprintf(info->name, "PCI Bus #%02x", bus_base); - info->bus_min = bus_base; - info->bus_max = bus_end; - - pci_read_config_word(dev, IOH_LIO, &word); - io_base = (word & 0xf0) << (12 - 4); - io_end = (word & 0xf000) | 0xfff; - update_res(info, io_base, io_end, IORESOURCE_IO, 0); - - pci_read_config_dword(dev, IOH_LMMIOL, &dword); - mmiol_base = (dword & 0xff00) << (24 - 8); - mmiol_end = (dword & 0xff000000) | 0xffffff; - update_res(info, mmiol_base, mmiol_end, IORESOURCE_MEM, 0); - - pci_read_config_dword(dev, IOH_LMMIOH, &dword); - mmioh_base = ((u64)(dword & 0xfc00)) << (26 - 10); - mmioh_end = ((u64)(dword & 0xfc000000) | 0x3ffffff); - pci_read_config_dword(dev, IOH_LMMIOH_BASEU, &dword); - mmioh_base |= ((u64)(dword & 0x7ffff)) << 32; - pci_read_config_dword(dev, IOH_LMMIOH_LIMITU, &dword); - mmioh_end |= ((u64)(dword & 0x7ffff)) << 32; - update_res(info, mmioh_base, mmioh_end, IORESOURCE_MEM, 0); - - print_ioh_resources(info); -} - -/* intel IOH */ -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, pci_root_bus_res); -- cgit v1.2.3 From 221af7f87b97431e3ee21ce4b0e77d5411cf1549 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 28 Jan 2010 22:14:42 -0800 Subject: Split 'flush_old_exec' into two functions 'flush_old_exec()' is the point of no return when doing an execve(), and it is pretty badly misnamed. It doesn't just flush the old executable environment, it also starts up the new one. Which is very inconvenient for things like setting up the new personality, because we want the new personality to affect the starting of the new environment, but at the same time we do _not_ want the new personality to take effect if flushing the old one fails. As a result, the x86-64 '32-bit' personality is actually done using this insane "I'm going to change the ABI, but I haven't done it yet" bit (TIF_ABI_PENDING), with SET_PERSONALITY() not actually setting the personality, but just the "pending" bit, so that "flush_thread()" can do the actual personality magic. This patch in no way changes any of that insanity, but it does split the 'flush_old_exec()' function up into a preparatory part that can fail (still called flush_old_exec()), and a new part that will actually set up the new exec environment (setup_new_exec()). All callers are changed to trivially comply with the new world order. Signed-off-by: H. Peter Anvin Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/sh/kernel/process_64.c | 2 +- arch/x86/ia32/ia32_aout.c | 10 ++++++---- fs/binfmt_aout.c | 1 + fs/binfmt_elf.c | 27 ++------------------------- fs/binfmt_elf_fdpic.c | 3 +++ fs/binfmt_flat.c | 1 + fs/binfmt_som.c | 1 + fs/exec.c | 26 ++++++++++++++++---------- include/linux/binfmts.h | 1 + include/linux/sched.h | 2 +- 10 files changed, 33 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index 31f80c61b031..ec79faf6f021 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -368,7 +368,7 @@ void exit_thread(void) void flush_thread(void) { - /* Called by fs/exec.c (flush_old_exec) to remove traces of a + /* Called by fs/exec.c (setup_new_exec) to remove traces of a * previously running executable. */ #ifdef CONFIG_SH_FPU if (last_task_used_math == current) { diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 2a4d073d2cf1..435d2a5323da 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -308,15 +308,17 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (retval) return retval; - regs->cs = __USER32_CS; - regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = - regs->r13 = regs->r14 = regs->r15 = 0; - /* OK, This is the point of no return */ set_personality(PER_LINUX); set_thread_flag(TIF_IA32); clear_thread_flag(TIF_ABI_PENDING); + setup_new_exec(bprm); + + regs->cs = __USER32_CS; + regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = + regs->r13 = regs->r14 = regs->r15 = 0; + current->mm->end_code = ex.a_text + (current->mm->start_code = N_TXTADDR(ex)); current->mm->end_data = ex.a_data + diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 346b69405363..fdd397099172 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -264,6 +264,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) #else set_personality(PER_LINUX); #endif + setup_new_exec(bprm); current->mm->end_code = ex.a_text + (current->mm->start_code = N_TXTADDR(ex)); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index edd90c49003c..fd5b2ea5d299 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -662,27 +662,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') goto out_free_interp; - /* - * The early SET_PERSONALITY here is so that the lookup - * for the interpreter happens in the namespace of the - * to-be-execed image. SET_PERSONALITY can select an - * alternate root. - * - * However, SET_PERSONALITY is NOT allowed to switch - * this task into the new images's memory mapping - * policy - that is, TASK_SIZE must still evaluate to - * that which is appropriate to the execing application. - * This is because exit_mmap() needs to have TASK_SIZE - * evaluate to the size of the old image. - * - * So if (say) a 64-bit application is execing a 32-bit - * application it is the architecture's responsibility - * to defer changing the value of TASK_SIZE until the - * switch really is going to happen - do this in - * flush_thread(). - akpm - */ - SET_PERSONALITY(loc->elf_ex); - interpreter = open_exec(elf_interpreter); retval = PTR_ERR(interpreter); if (IS_ERR(interpreter)) @@ -730,9 +709,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* Verify the interpreter has a valid arch */ if (!elf_check_arch(&loc->interp_elf_ex)) goto out_free_dentry; - } else { - /* Executables without an interpreter also need a personality */ - SET_PERSONALITY(loc->elf_ex); } /* Flush all traces of the currently running executable */ @@ -752,7 +728,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) current->flags |= PF_RANDOMIZE; - arch_pick_mmap_layout(current->mm); + + setup_new_exec(bprm); /* Do this so that we can load the interpreter, if need be. We will change some of these later */ diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index c57d9ce5ff7e..18d77297ccc8 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -321,6 +321,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, set_personality(PER_LINUX_FDPIC); if (elf_read_implies_exec(&exec_params.hdr, executable_stack)) current->personality |= READ_IMPLIES_EXEC; + + setup_new_exec(bprm); + set_binfmt(&elf_fdpic_format); current->mm->start_code = 0; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index d4a00ea1054c..42c6b4a54445 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -519,6 +519,7 @@ static int load_flat_file(struct linux_binprm * bprm, /* OK, This is the point of no return */ set_personality(PER_LINUX_32BIT); + setup_new_exec(bprm); } /* diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 2a9b5330cc5e..cc8560f6c9b0 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -227,6 +227,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) /* OK, This is the point of no return */ current->flags &= ~PF_FORKNOEXEC; current->personality = PER_HPUX; + setup_new_exec(bprm); /* Set the task size for HP-UX processes such that * the gateway page is outside the address space. diff --git a/fs/exec.c b/fs/exec.c index 632b02e34ec7..675c3f44c2ea 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -941,9 +941,7 @@ void set_task_comm(struct task_struct *tsk, char *buf) int flush_old_exec(struct linux_binprm * bprm) { - char * name; - int i, ch, retval; - char tcomm[sizeof(current->comm)]; + int retval; /* * Make sure we have a private signal table and that @@ -963,6 +961,20 @@ int flush_old_exec(struct linux_binprm * bprm) goto out; bprm->mm = NULL; /* We're using it now */ + return 0; + +out: + return retval; +} +EXPORT_SYMBOL(flush_old_exec); + +void setup_new_exec(struct linux_binprm * bprm) +{ + int i, ch; + char * name; + char tcomm[sizeof(current->comm)]; + + arch_pick_mmap_layout(current->mm); /* This is the point of no return */ current->sas_ss_sp = current->sas_ss_size = 0; @@ -1019,14 +1031,8 @@ int flush_old_exec(struct linux_binprm * bprm) flush_signal_handlers(current, 0); flush_old_files(current->files); - - return 0; - -out: - return retval; } - -EXPORT_SYMBOL(flush_old_exec); +EXPORT_SYMBOL(setup_new_exec); /* * Prepare credentials and lock ->cred_guard_mutex. diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index cd4349bdc34e..89c6249fc561 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -109,6 +109,7 @@ extern int prepare_binprm(struct linux_binprm *); extern int __must_check remove_arg_zero(struct linux_binprm *); extern int search_binary_handler(struct linux_binprm *,struct pt_regs *); extern int flush_old_exec(struct linux_binprm * bprm); +extern void setup_new_exec(struct linux_binprm * bprm); extern int suid_dumpable; #define SUID_DUMP_DISABLE 0 /* No setuid dumping */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 6f7bba93929b..abdfacc58653 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1369,7 +1369,7 @@ struct task_struct { char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock it with task_lock()) - - initialized normally by flush_old_exec */ + - initialized normally by setup_new_exec */ /* file system info */ int link_count, total_link_count; #ifdef CONFIG_SYSVIPC -- cgit v1.2.3 From 05d43ed8a89c159ff641d472f970e3f1baa66318 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 28 Jan 2010 22:14:43 -0800 Subject: x86: get rid of the insane TIF_ABI_PENDING bit Now that the previous commit made it possible to do the personality setting at the point of no return, we do just that for ELF binaries. And suddenly all the reasons for that insane TIF_ABI_PENDING bit go away, and we can just make SET_PERSONALITY() just do the obvious thing for a 32-bit compat process. Everything becomes much more straightforward this way. Signed-off-by: H. Peter Anvin Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/ia32/ia32_aout.c | 1 - arch/x86/include/asm/elf.h | 10 ++-------- arch/x86/include/asm/thread_info.h | 2 -- arch/x86/kernel/process.c | 12 ------------ arch/x86/kernel/process_64.c | 11 +++++++++++ 5 files changed, 13 insertions(+), 23 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 435d2a5323da..f9f472462753 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -311,7 +311,6 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* OK, This is the point of no return */ set_personality(PER_LINUX); set_thread_flag(TIF_IA32); - clear_thread_flag(TIF_ABI_PENDING); setup_new_exec(bprm); diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index b4501ee223ad..1994d3f58443 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -181,14 +181,8 @@ do { \ void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); #define compat_start_thread start_thread_ia32 -#define COMPAT_SET_PERSONALITY(ex) \ -do { \ - if (test_thread_flag(TIF_IA32)) \ - clear_thread_flag(TIF_ABI_PENDING); \ - else \ - set_thread_flag(TIF_ABI_PENDING); \ - current->personality |= force_personality32; \ -} while (0) +void set_personality_ia32(void); +#define COMPAT_SET_PERSONALITY(ex) set_personality_ia32() #define COMPAT_ELF_PLATFORM ("i686") diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 375c917c37d2..e0d28901e969 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -87,7 +87,6 @@ struct thread_info { #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* 32bit process */ #define TIF_FORK 18 /* ret_from_fork */ -#define TIF_ABI_PENDING 19 #define TIF_MEMDIE 20 #define TIF_DEBUG 21 /* uses debug registers */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ @@ -112,7 +111,6 @@ struct thread_info { #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) -#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING) #define _TIF_DEBUG (1 << TIF_DEBUG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FREEZE (1 << TIF_FREEZE) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 02c3ee013ccd..c9b3522b6b46 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -115,18 +115,6 @@ void flush_thread(void) { struct task_struct *tsk = current; -#ifdef CONFIG_X86_64 - if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { - clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); - if (test_tsk_thread_flag(tsk, TIF_IA32)) { - clear_tsk_thread_flag(tsk, TIF_IA32); - } else { - set_tsk_thread_flag(tsk, TIF_IA32); - current_thread_info()->status |= TS_COMPAT; - } - } -#endif - flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index f9e033150cdf..41a26a82470a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -521,6 +521,17 @@ void set_personality_64bit(void) current->personality &= ~READ_IMPLIES_EXEC; } +void set_personality_ia32(void) +{ + /* inherit personality from parent */ + + /* Make sure to be in 32bit mode */ + set_thread_flag(TIF_IA32); + + /* Prepare the first "return" to user space */ + current_thread_info()->status |= TS_COMPAT; +} + unsigned long get_wchan(struct task_struct *p) { unsigned long stack; -- cgit v1.2.3 From 7c099ce1575126395f186ecf58b51a60d5c3be7d Mon Sep 17 00:00:00 2001 From: David Härdeman Date: Thu, 28 Jan 2010 21:02:54 +0100 Subject: x86: Add quirk for Intel DG45FC board to avoid low memory corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 6aa542a694dc9ea4344a8a590d2628c33d1b9431 added a quirk for the Intel DG45ID board due to low memory corruption. The Intel DG45FC shares the same BIOS (and the same bug) as noted in: http://bugzilla.kernel.org/show_bug.cgi?id=13736 Signed-off-by: David Härdeman LKML-Reference: <20100128200254.GA9134@hardeman.nu> Cc: Cc: Alexey Fisher Cc: ykzhao Cc: Tony Bones Cc: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f7b8b9894b22..5d9e40c58628 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -642,19 +642,27 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"), }, }, - { /* - * AMI BIOS with low memory corruption was found on Intel DG45ID board. - * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will + * AMI BIOS with low memory corruption was found on Intel DG45ID and + * DG45FC boards. + * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will * match only DMI_BOARD_NAME and see if there is more bad products * with this vendor. */ + { .callback = dmi_low_memory_corruption, .ident = "AMI BIOS", .matches = { DMI_MATCH(DMI_BOARD_NAME, "DG45ID"), }, }, + { + .callback = dmi_low_memory_corruption, + .ident = "AMI BIOS", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "DG45FC"), + }, + }, #endif {} }; -- cgit v1.2.3 From cc0967490c1c3824bc5b75718b6ca8a51d9f2617 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 28 Jan 2010 17:04:42 -0600 Subject: x86, hw_breakpoints, kgdb: Fix kgdb to use hw_breakpoint API In the 2.6.33 kernel, the hw_breakpoint API is now used for the performance event counters. The hw_breakpoint_handler() now consumes the hw breakpoints that were previously set by kgdb arch specific code. In order for kgdb to work in conjunction with this core API change, kgdb must use some of the low level functions of the hw_breakpoint API to install, uninstall, and deal with hw breakpoint reservations. The kgdb core required a change to call kgdb_disable_hw_debug anytime a slave cpu enters kgdb_wait() in order to keep all the hw breakpoints in sync as well as to prevent hitting a hw breakpoint while kgdb is active. During the architecture specific initialization of kgdb, it will pre-allocate 4 disabled (struct perf event **) structures. Kgdb will use these to manage the capabilities for the 4 hw breakpoint registers, per cpu. Right now the hw_breakpoint API does not have a way to ask how many breakpoints are available, on each CPU so it is possible that the install of a breakpoint might fail when kgdb restores the system to the run state. The intent of this patch is to first get the basic functionality of hw breakpoints working and leave it to the person debugging the kernel to understand what hw breakpoints are in use and what restrictions have been imposed as a result. Breakpoint constraints will be dealt with in a future patch. While atomic, the x86 specific kgdb code will call arch_uninstall_hw_breakpoint() and arch_install_hw_breakpoint() to manage the cpu specific hw breakpoints. The net result of these changes allow kgdb to use the same pool of hw_breakpoints that are used by the perf event API, but neither knows about future reservations for the available hw breakpoint slots. Signed-off-by: Jason Wessel Acked-by: Frederic Weisbecker Cc: kgdb-bugreport@lists.sourceforge.net Cc: K.Prasad Cc: Peter Zijlstra Cc: Alan Stern Cc: torvalds@linux-foundation.org LKML-Reference: <1264719883-7285-2-git-send-email-jason.wessel@windriver.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/kgdb.c | 171 ++++++++++++++++++++++++++++++++----------------- kernel/kgdb.c | 3 + 2 files changed, 117 insertions(+), 57 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index dd74fe7273b1..62bea7307eaa 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -204,40 +205,38 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) static struct hw_breakpoint { unsigned enabled; - unsigned type; - unsigned len; unsigned long addr; + int len; + int type; + struct perf_event **pev; } breakinfo[4]; static void kgdb_correct_hw_break(void) { - unsigned long dr7; - int correctit = 0; - int breakbit; int breakno; - get_debugreg(dr7, 7); for (breakno = 0; breakno < 4; breakno++) { - breakbit = 2 << (breakno << 1); - if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { - correctit = 1; - dr7 |= breakbit; - dr7 &= ~(0xf0000 << (breakno << 2)); - dr7 |= ((breakinfo[breakno].len << 2) | - breakinfo[breakno].type) << - ((breakno << 2) + 16); - set_debugreg(breakinfo[breakno].addr, breakno); - - } else { - if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { - correctit = 1; - dr7 &= ~breakbit; - dr7 &= ~(0xf0000 << (breakno << 2)); - } - } + struct perf_event *bp; + struct arch_hw_breakpoint *info; + int val; + int cpu = raw_smp_processor_id(); + if (!breakinfo[breakno].enabled) + continue; + bp = *per_cpu_ptr(breakinfo[breakno].pev, cpu); + info = counter_arch_bp(bp); + if (bp->attr.disabled != 1) + continue; + bp->attr.bp_addr = breakinfo[breakno].addr; + bp->attr.bp_len = breakinfo[breakno].len; + bp->attr.bp_type = breakinfo[breakno].type; + info->address = breakinfo[breakno].addr; + info->len = breakinfo[breakno].len; + info->type = breakinfo[breakno].type; + val = arch_install_hw_breakpoint(bp); + if (!val) + bp->attr.disabled = 0; } - if (correctit) - set_debugreg(dr7, 7); + hw_breakpoint_restore(); } static int @@ -259,15 +258,23 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) static void kgdb_remove_all_hw_break(void) { int i; + int cpu = raw_smp_processor_id(); + struct perf_event *bp; - for (i = 0; i < 4; i++) - memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint)); + for (i = 0; i < 4; i++) { + if (!breakinfo[i].enabled) + continue; + bp = *per_cpu_ptr(breakinfo[i].pev, cpu); + if (bp->attr.disabled == 1) + continue; + arch_uninstall_hw_breakpoint(bp); + bp->attr.disabled = 1; + } } static int kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) { - unsigned type; int i; for (i = 0; i < 4; i++) @@ -278,27 +285,38 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) switch (bptype) { case BP_HARDWARE_BREAKPOINT: - type = 0; - len = 1; + len = 1; + breakinfo[i].type = X86_BREAKPOINT_EXECUTE; break; case BP_WRITE_WATCHPOINT: - type = 1; + breakinfo[i].type = X86_BREAKPOINT_WRITE; break; case BP_ACCESS_WATCHPOINT: - type = 3; + breakinfo[i].type = X86_BREAKPOINT_RW; break; default: return -1; } - - if (len == 1 || len == 2 || len == 4) - breakinfo[i].len = len - 1; - else + switch (len) { + case 1: + breakinfo[i].len = X86_BREAKPOINT_LEN_1; + break; + case 2: + breakinfo[i].len = X86_BREAKPOINT_LEN_2; + break; + case 4: + breakinfo[i].len = X86_BREAKPOINT_LEN_4; + break; +#ifdef CONFIG_X86_64 + case 8: + breakinfo[i].len = X86_BREAKPOINT_LEN_8; + break; +#endif + default: return -1; - - breakinfo[i].enabled = 1; + } breakinfo[i].addr = addr; - breakinfo[i].type = type; + breakinfo[i].enabled = 1; return 0; } @@ -313,8 +331,21 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) */ void kgdb_disable_hw_debug(struct pt_regs *regs) { + int i; + int cpu = raw_smp_processor_id(); + struct perf_event *bp; + /* Disable hardware debugging while we are in kgdb: */ set_debugreg(0UL, 7); + for (i = 0; i < 4; i++) { + if (!breakinfo[i].enabled) + continue; + bp = *per_cpu_ptr(breakinfo[i].pev, cpu); + if (bp->attr.disabled == 1) + continue; + arch_uninstall_hw_breakpoint(bp); + bp->attr.disabled = 1; + } } /** @@ -378,7 +409,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, struct pt_regs *linux_regs) { unsigned long addr; - unsigned long dr6; char *ptr; int newPC; @@ -404,20 +434,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, raw_smp_processor_id()); } - get_debugreg(dr6, 6); - if (!(dr6 & 0x4000)) { - int breakno; - - for (breakno = 0; breakno < 4; breakno++) { - if (dr6 & (1 << breakno) && - breakinfo[breakno].type == 0) { - /* Set restore flag: */ - linux_regs->flags |= X86_EFLAGS_RF; - break; - } - } - } - set_debugreg(0UL, 6); kgdb_correct_hw_break(); return 0; @@ -485,8 +501,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) break; case DIE_DEBUG: - if (atomic_read(&kgdb_cpu_doing_single_step) == - raw_smp_processor_id()) { + if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { if (user_mode(regs)) return single_step_cont(regs, args); break; @@ -539,7 +554,42 @@ static struct notifier_block kgdb_notifier = { */ int kgdb_arch_init(void) { - return register_die_notifier(&kgdb_notifier); + int i, cpu; + int ret; + struct perf_event_attr attr; + struct perf_event **pevent; + + ret = register_die_notifier(&kgdb_notifier); + if (ret != 0) + return ret; + /* + * Pre-allocate the hw breakpoint structions in the non-atomic + * portion of kgdb because this operation requires mutexs to + * complete. + */ + attr.bp_addr = (unsigned long)kgdb_arch_init; + attr.type = PERF_TYPE_BREAKPOINT; + attr.bp_len = HW_BREAKPOINT_LEN_1; + attr.bp_type = HW_BREAKPOINT_W; + attr.disabled = 1; + for (i = 0; i < 4; i++) { + breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); + if (IS_ERR(breakinfo[i].pev)) { + printk(KERN_ERR "kgdb: Could not allocate hw breakpoints\n"); + breakinfo[i].pev = NULL; + kgdb_arch_exit(); + return -1; + } + for_each_online_cpu(cpu) { + pevent = per_cpu_ptr(breakinfo[i].pev, cpu); + pevent[0]->hw.sample_period = 1; + if (pevent[0]->destroy != NULL) { + pevent[0]->destroy = NULL; + release_bp_slot(*pevent); + } + } + } + return ret; } /** @@ -550,6 +600,13 @@ int kgdb_arch_init(void) */ void kgdb_arch_exit(void) { + int i; + for (i = 0; i < 4; i++) { + if (breakinfo[i].pev) { + unregister_wide_hw_breakpoint(breakinfo[i].pev); + breakinfo[i].pev = NULL; + } + } unregister_die_notifier(&kgdb_notifier); } diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 2eb517e23514..c7ade62e4ef0 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -583,6 +583,9 @@ static void kgdb_wait(struct pt_regs *regs) smp_wmb(); atomic_set(&cpu_in_kgdb[cpu], 1); + /* Disable any cpu specific hw breakpoints */ + kgdb_disable_hw_debug(regs); + /* Wait till primary CPU is done with debugging */ while (atomic_read(&passive_cpu_wait[cpu])) cpu_relax(); -- cgit v1.2.3 From 5352ae638e2d7d5c9b2e4d528676bbf2af6fd6f3 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 28 Jan 2010 17:04:43 -0600 Subject: perf, hw_breakpoint, kgdb: Do not take mutex for kernel debugger This patch fixes the regression in functionality where the kernel debugger and the perf API do not nicely share hw breakpoint reservations. The kernel debugger cannot use any mutex_lock() calls because it can start the kernel running from an invalid context. A mutex free version of the reservation API needed to get created for the kernel debugger to safely update hw breakpoint reservations. The possibility for a breakpoint reservation to be concurrently processed at the time that kgdb interrupts the system is improbable. Should this corner case occur the end user is warned, and the kernel debugger will prohibit updating the hardware breakpoint reservations. Any time the kernel debugger reserves a hardware breakpoint it will be a system wide reservation. Signed-off-by: Jason Wessel Acked-by: Frederic Weisbecker Cc: kgdb-bugreport@lists.sourceforge.net Cc: K.Prasad Cc: Peter Zijlstra Cc: Alan Stern Cc: torvalds@linux-foundation.org LKML-Reference: <1264719883-7285-3-git-send-email-jason.wessel@windriver.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/kgdb.c | 51 ++++++++++++++++++++++++++++++++++++++++++ include/linux/hw_breakpoint.h | 2 ++ kernel/hw_breakpoint.c | 52 ++++++++++++++++++++++++++++++++++--------- 3 files changed, 95 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 62bea7307eaa..bfba6019d762 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -239,6 +239,49 @@ static void kgdb_correct_hw_break(void) hw_breakpoint_restore(); } +static int hw_break_reserve_slot(int breakno) +{ + int cpu; + int cnt = 0; + struct perf_event **pevent; + + for_each_online_cpu(cpu) { + cnt++; + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + if (dbg_reserve_bp_slot(*pevent)) + goto fail; + } + + return 0; + +fail: + for_each_online_cpu(cpu) { + cnt--; + if (!cnt) + break; + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + dbg_release_bp_slot(*pevent); + } + return -1; +} + +static int hw_break_release_slot(int breakno) +{ + struct perf_event **pevent; + int cpu; + + for_each_online_cpu(cpu) { + pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); + if (dbg_release_bp_slot(*pevent)) + /* + * The debugger is responisble for handing the retry on + * remove failure. + */ + return -1; + } + return 0; +} + static int kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) { @@ -250,6 +293,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) if (i == 4) return -1; + if (hw_break_release_slot(i)) { + printk(KERN_ERR "Cannot remove hw breakpoint at %lx\n", addr); + return -1; + } breakinfo[i].enabled = 0; return 0; @@ -316,6 +363,10 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) return -1; } breakinfo[i].addr = addr; + if (hw_break_reserve_slot(i)) { + breakinfo[i].addr = 0; + return -1; + } breakinfo[i].enabled = 1; return 0; diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 41235c93e4e9..070ba0621738 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -75,6 +75,8 @@ extern int __register_perf_hw_breakpoint(struct perf_event *bp); extern void unregister_hw_breakpoint(struct perf_event *bp); extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events); +extern int dbg_reserve_bp_slot(struct perf_event *bp); +extern int dbg_release_bp_slot(struct perf_event *bp); extern int reserve_bp_slot(struct perf_event *bp); extern void release_bp_slot(struct perf_event *bp); diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index c030ae657f20..8a5c7d55ac9f 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -243,38 +243,70 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM */ -int reserve_bp_slot(struct perf_event *bp) +static int __reserve_bp_slot(struct perf_event *bp) { struct bp_busy_slots slots = {0}; - int ret = 0; - - mutex_lock(&nr_bp_mutex); fetch_bp_busy_slots(&slots, bp); /* Flexible counters need to keep at least one slot */ - if (slots.pinned + (!!slots.flexible) == HBP_NUM) { - ret = -ENOSPC; - goto end; - } + if (slots.pinned + (!!slots.flexible) == HBP_NUM) + return -ENOSPC; toggle_bp_slot(bp, true); -end: + return 0; +} + +int reserve_bp_slot(struct perf_event *bp) +{ + int ret; + + mutex_lock(&nr_bp_mutex); + + ret = __reserve_bp_slot(bp); + mutex_unlock(&nr_bp_mutex); return ret; } +static void __release_bp_slot(struct perf_event *bp) +{ + toggle_bp_slot(bp, false); +} + void release_bp_slot(struct perf_event *bp) { mutex_lock(&nr_bp_mutex); - toggle_bp_slot(bp, false); + __release_bp_slot(bp); mutex_unlock(&nr_bp_mutex); } +/* + * Allow the kernel debugger to reserve breakpoint slots without + * taking a lock using the dbg_* variant of for the reserve and + * release breakpoint slots. + */ +int dbg_reserve_bp_slot(struct perf_event *bp) +{ + if (mutex_is_locked(&nr_bp_mutex)) + return -1; + + return __reserve_bp_slot(bp); +} + +int dbg_release_bp_slot(struct perf_event *bp) +{ + if (mutex_is_locked(&nr_bp_mutex)) + return -1; + + __release_bp_slot(bp); + + return 0; +} int register_perf_hw_breakpoint(struct perf_event *bp) { -- cgit v1.2.3 From ab09809f2eee1dc2d8f8bea636e77d176ba6c648 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 2 Feb 2010 14:38:12 -0800 Subject: x86, doc: Fix minor spelling error in arch/x86/mm/gup.c Fix minor spelling error in comment. No code change. Signed-off-by: Andy Shevchenko LKML-Reference: <201002022238.o12McDiF018720@imap1.linux-foundation.org> Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: H. Peter Anvin --- arch/x86/mm/gup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 71da1bca13cb..738e6593799d 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -18,7 +18,7 @@ static inline pte_t gup_get_pte(pte_t *ptep) #else /* * With get_user_pages_fast, we walk down the pagetables without taking - * any locks. For this we would like to load the pointers atoimcally, + * any locks. For this we would like to load the pointers atomically, * but that is not possible (without expensive cmpxchg8b) on PAE. What * we do have is the guarantee that a pte will only either go from not * present to present, or present to not present or both -- it will not -- cgit v1.2.3 From ea0854170c95245a258b386c7a9314399c949fe0 Mon Sep 17 00:00:00 2001 From: Shaohui Zheng Date: Tue, 2 Feb 2010 13:44:16 -0800 Subject: memory hotplug: fix a bug on /dev/mem for 64-bit kernels Newly added memory can not be accessed via /dev/mem, because we do not update the variables high_memory, max_pfn and max_low_pfn. Add a function update_end_of_memory_vars() to update these variables for 64-bit kernels. [akpm@linux-foundation.org: simplify comment] Signed-off-by: Shaohui Zheng Cc: Andi Kleen Cc: Li Haicheng Reviewed-by: Wu Fengguang Reviewed-by: KAMEZAWA Hiroyuki Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5198b9bb34ef..69ddfbd91135 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -49,6 +49,7 @@ #include #include #include +#include static unsigned long dma_reserve __initdata; @@ -615,6 +616,21 @@ void __init paging_init(void) * Memory hotplug specific functions */ #ifdef CONFIG_MEMORY_HOTPLUG +/* + * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need + * updating. + */ +static void update_end_of_memory_vars(u64 start, u64 size) +{ + unsigned long end_pfn = PFN_UP(start + size); + + if (end_pfn > max_pfn) { + max_pfn = end_pfn; + max_low_pfn = end_pfn; + high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; + } +} + /* * Memory is added always to NORMAL zone. This means you will never get * additional DMA/DMA32 memory. @@ -634,6 +650,9 @@ int arch_add_memory(int nid, u64 start, u64 size) ret = __add_pages(nid, zone, start_pfn, nr_pages); WARN_ON_ONCE(ret); + /* update max_pfn, max_low_pfn and high_memory */ + update_end_of_memory_vars(start, size); + return ret; } EXPORT_SYMBOL_GPL(arch_add_memory); -- cgit v1.2.3 From 923de3cf5bf12049628019010e36623fca5ef6d1 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 27 Jan 2010 19:13:49 +0800 Subject: kvmclock: count total_sleep_time when updating guest clock Current kvm wallclock does not consider the total_sleep_time which could cause wrong wallclock in guest after host suspend/resume. This patch solve this issue by counting total_sleep_time to get the correct host boot time. Cc: stable@kernel.org Signed-off-by: Jason Wang Acked-by: Glauber Costa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1ddcad452add..a1e1bc9d412d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -670,7 +670,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) { static int version; struct pvclock_wall_clock wc; - struct timespec now, sys, boot; + struct timespec boot; if (!wall_clock) return; @@ -685,9 +685,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) * wall clock specified here. guest system time equals host * system time for us, thus we must fill in host boot time here. */ - now = current_kernel_time(); - ktime_get_ts(&sys); - boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys)); + getboottime(&boot); wc.sec = boot.tv_sec; wc.nsec = boot.tv_nsec; @@ -762,6 +760,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) local_irq_save(flags); kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); ktime_get_ts(&ts); + monotonic_to_bootbased(&ts); local_irq_restore(flags); /* With all the info we got, fill in the values */ -- cgit v1.2.3 From ee73f656a604d5aa9df86a97102e4e462dd79924 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 29 Jan 2010 17:28:41 -0200 Subject: KVM: PIT: control word is write-only PIT control word (address 0x43) is write-only, reads are undefined. Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/i8254.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 296aba49472a..15578f180e59 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -467,6 +467,9 @@ static int pit_ioport_read(struct kvm_io_device *this, return -EOPNOTSUPP; addr &= KVM_PIT_CHANNEL_MASK; + if (addr == 3) + return 0; + s = &pit_state->channels[addr]; mutex_lock(&pit_state->lock); -- cgit v1.2.3 From cf9db6c41f739a294286847aab1e85f39aef1781 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 8 Feb 2010 20:35:02 -0600 Subject: x86-32: Make AT_VECTOR_SIZE_ARCH=2 Both x86-32 and x86-64 with 32-bit compat use ARCH_DLINFO_IA32, which defines two saved_auxv entries. But system.h only defines AT_VECTOR_SIZE_ARCH as 2 for CONFIG_IA32_EMULATION, not for CONFIG_X86_32. Fix that. Signed-off-by: Serge E. Hallyn LKML-Reference: <20100209023502.GA15408@us.ibm.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/system.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index ecb544e65382..e04740f7a0bb 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -11,9 +11,9 @@ #include /* entries in ARCH_DLINFO: */ -#ifdef CONFIG_IA32_EMULATION +#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64) # define AT_VECTOR_SIZE_ARCH 2 -#else +#else /* else it's non-compat x86-64 */ # define AT_VECTOR_SIZE_ARCH 1 #endif -- cgit v1.2.3 From 681ee44d40d7c93b42118320e4620d07d8704fd6 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 9 Feb 2010 18:01:44 -0800 Subject: x86, apic: Don't use logical-flat mode when CPU hotplug may exceed 8 CPUs We need to fall back from logical-flat APIC mode to physical-flat mode when we have more than 8 CPUs. However, in the presence of CPU hotplug(with bios listing not enabled but possible cpus as disabled cpus in MADT), we have to consider the number of possible CPUs rather than the number of current CPUs; otherwise we may cross the 8-CPU boundary when CPUs are added later. 32bit apic code can use more cleanups (like the removal of vendor checks in 32bit default_setup_apic_routing()) and more unifications with 64bit code. Yinghai has some patches in works already. This patch addresses the boot issue that is reported in the virtualization guest context. [ hpa: incorporated function annotation feedback from Yinghai Lu ] Signed-off-by: Suresh Siddha LKML-Reference: <1265767304.2833.19.camel@sbs-t61.sc.intel.com> Acked-by: Shaohui Zheng Reviewed-by: Yinghai Lu Cc: Signed-off-by: H. Peter Anvin --- arch/x86/kernel/acpi/boot.c | 5 ----- arch/x86/kernel/apic/apic.c | 17 ----------------- arch/x86/kernel/apic/probe_32.c | 29 +++++++++++++++++++++++++++-- arch/x86/kernel/apic/probe_64.c | 2 +- arch/x86/kernel/mpparse.c | 7 ------- arch/x86/kernel/smpboot.c | 2 -- 6 files changed, 28 insertions(+), 34 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 036d28adf59d..0acbcdfa5ca4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1185,9 +1185,6 @@ static void __init acpi_process_madt(void) if (!error) { acpi_lapic = 1; -#ifdef CONFIG_X86_BIGSMP - generic_bigsmp_probe(); -#endif /* * Parse MADT IO-APIC entries */ @@ -1197,8 +1194,6 @@ static void __init acpi_process_madt(void) acpi_ioapic = 1; smp_found_config = 1; - if (apic->setup_apic_routing) - apic->setup_apic_routing(); } } if (error == -EINVAL) { diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3987e4408f75..dfca210f6a10 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1641,9 +1641,7 @@ int __init APIC_init_uniprocessor(void) #endif enable_IR_x2apic(); -#ifdef CONFIG_X86_64 default_setup_apic_routing(); -#endif verify_local_APIC(); connect_bsp_APIC(); @@ -1891,21 +1889,6 @@ void __cpuinit generic_processor_info(int apicid, int version) if (apicid > max_physical_apicid) max_physical_apicid = apicid; -#ifdef CONFIG_X86_32 - if (num_processors > 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(version)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } -#endif - #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1a6559f6768c..99d2fe016084 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -52,7 +52,32 @@ static int __init print_ipi_mode(void) } late_initcall(print_ipi_mode); -void default_setup_apic_routing(void) +void __init default_setup_apic_routing(void) +{ + int version = apic_version[boot_cpu_physical_apicid]; + + if (num_possible_cpus() > 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } + +#ifdef CONFIG_X86_BIGSMP + generic_bigsmp_probe(); +#endif + + if (apic->setup_apic_routing) + apic->setup_apic_routing(); +} + +static void setup_apic_flat_routing(void) { #ifdef CONFIG_X86_IO_APIC printk(KERN_INFO @@ -103,7 +128,7 @@ struct apic apic_default = { .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, - .setup_apic_routing = default_setup_apic_routing, + .setup_apic_routing = setup_apic_flat_routing, .multi_timer_check = NULL, .apicid_to_node = default_apicid_to_node, .cpu_to_logical_apicid = default_cpu_to_logical_apicid, diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 450fe2064a14..83e9be4778e2 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -67,7 +67,7 @@ void __init default_setup_apic_routing(void) } #endif - if (apic == &apic_flat && num_processors > 8) + if (apic == &apic_flat && num_possible_cpus() > 8) apic = &apic_physflat; printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 40b54ceb68b5..a2c1edd2d3ac 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -359,13 +359,6 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) x86_init.mpparse.mpc_record(1); } -#ifdef CONFIG_X86_BIGSMP - generic_bigsmp_probe(); -#endif - - if (apic->setup_apic_routing) - apic->setup_apic_routing(); - if (!num_processors) printk(KERN_ERR "MPTABLE: no processors registered!\n"); return num_processors; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 678d0b8c26f3..b4e870cbdc60 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1083,9 +1083,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_cpu_sibling_map(0); enable_IR_x2apic(); -#ifdef CONFIG_X86_64 default_setup_apic_routing(); -#endif if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); -- cgit v1.2.3 From 97c169d39b6846a564dc8d883832e7fef9bdb77d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 16 Feb 2010 03:30:06 -0500 Subject: ACPI: remove Asus P2B-DS from acpi=ht blacklist We realized when we broke acpi=ht http://bugzilla.kernel.org/show_bug.cgi?id=14886 that acpi=ht is not needed on this box and folks have been using acpi=force on it anyway. Signed-off-by: Len Brown --- arch/x86/kernel/acpi/boot.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 0acbcdfa5ca4..af1c5833ff23 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1342,14 +1342,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), }, }, - { - .callback = force_acpi_ht, - .ident = "ASUS P2B-DS", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"), - }, - }, { .callback = force_acpi_ht, .ident = "ASUS CUR-DLS", -- cgit v1.2.3 From 1252f238db48ec419f40c1bdf30fda649860eed9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 16 Feb 2010 15:02:13 +0100 Subject: x86: set_personality_ia32() misses force_personality32 05d43ed8a "x86: get rid of the insane TIF_ABI_PENDING bit" forgot about force_personality32. Fix. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- arch/x86/kernel/process_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 41a26a82470a..126f0b493d04 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -527,6 +527,7 @@ void set_personality_ia32(void) /* Make sure to be in 32bit mode */ set_thread_flag(TIF_IA32); + current->personality |= force_personality32; /* Prepare the first "return" to user space */ current_thread_info()->status |= TS_COMPAT; -- cgit v1.2.3 From 11557b24fdec13cb1c3d5f681688401a651ed54e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 16 Feb 2010 15:24:01 +0100 Subject: x86: ELF_PLAT_INIT() shouldn't worry about TIF_IA32 The 64-bit version of ELF_PLAT_INIT() clears TIF_IA32, but at this point it has already been cleared by SET_PERSONALITY == set_personality_64bit. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- arch/x86/include/asm/elf.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1994d3f58443..f2ad2163109d 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -170,10 +170,7 @@ static inline void elf_common_init(struct thread_struct *t, } #define ELF_PLAT_INIT(_r, load_addr) \ -do { \ - elf_common_init(¤t->thread, _r, 0); \ - clear_thread_flag(TIF_IA32); \ -} while (0) + elf_common_init(¤t->thread, _r, 0) #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ elf_common_init(¤t->thread, regs, __USER_DS) -- cgit v1.2.3 From 84d710926797a6e317e7e94654a3ccd771cfd8a3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Feb 2010 16:00:59 +0100 Subject: hw-breakpoints: Accept breakpoints on NULL address Before we had a generic breakpoint API, ptrace was accepting breakpoints on NULL address in x86. The new API refuse them, without given strong reasons. We need to follow the previous behaviour as some userspace apps like Wine need such NULL breakpoints to ensure old emulated software protections are still working. This fixes a 2.6.32 - 2.6.33-x ptrace regression. Reported-and-tested-by: Michael Stefaniuc Signed-off-by: Frederic Weisbecker Acked-by: K.Prasad Acked-by: Roland McGrath Cc: Alan Stern Cc: Maneesh Soni Cc: Alexandre Julliard Cc: Rafael J. Wysocki Cc: Maciej Rutecki --- arch/x86/kernel/hw_breakpoint.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 05d5fec64a94..bb6006e3e295 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -212,25 +212,6 @@ static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); } -/* - * Store a breakpoint's encoded address, length, and type. - */ -static int arch_store_info(struct perf_event *bp) -{ - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - /* - * For kernel-addresses, either the address or symbol name can be - * specified. - */ - if (info->name) - info->address = (unsigned long) - kallsyms_lookup_name(info->name); - if (info->address) - return 0; - - return -EINVAL; -} - int arch_bp_generic_fields(int x86_len, int x86_type, int *gen_len, int *gen_type) { @@ -362,10 +343,13 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, return ret; } - ret = arch_store_info(bp); - - if (ret < 0) - return ret; + /* + * For kernel-addresses, either the address or symbol name can be + * specified. + */ + if (info->name) + info->address = (unsigned long) + kallsyms_lookup_name(info->name); /* * Check that the low-order bits of the address are appropriate * for the alignment implied by len. -- cgit v1.2.3 From 326264a02448b0ac51f78f178b78e830aa077a0b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Feb 2010 18:24:18 +0100 Subject: hw-breakpoint: Keep track of dr7 local enable bits When the user enables breakpoints through dr7, he can choose between "local" or "global" enable bits but given how linux is implemented, both have the same effect. That said we don't keep track how the user enabled the breakpoints so when the user requests the dr7 value, we only translate the "enabled" status using the global enabled bits. It means that if the user enabled a breakpoint using the local enabled bit, reading back dr7 will set the global bit and clear the local one. Apps like Wine expect a full dr7 POKEUSER/PEEKUSER match for emulated softwares that implement old reverse engineering protection schemes. We fix that by keeping track of the whole dr7 value given by the user in the thread structure to drop this bug. We'll think about something more proper later. This fixes a 2.6.32 - 2.6.33-x ptrace regression. Reported-and-tested-by: Michael Stefaniuc Signed-off-by: Frederic Weisbecker Acked-by: K.Prasad Cc: Alan Stern Cc: Maneesh Soni Cc: Alexandre Julliard Cc: Rafael J. Wysocki Cc: Maciej Rutecki --- arch/x86/include/asm/processor.h | 2 ++ arch/x86/kernel/ptrace.c | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index fc801bab1b3b..b753ea59703a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -450,6 +450,8 @@ struct thread_struct { struct perf_event *ptrace_bps[HBP_NUM]; /* Debug status used for traps, single steps, etc... */ unsigned long debugreg6; + /* Keep track of the exact dr7 value set by the user */ + unsigned long ptrace_dr7; /* Fault info: */ unsigned long cr2; unsigned long trap_no; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 017d937639fe..0c1033d61e59 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -702,7 +702,7 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) } else if (n == 6) { val = thread->debugreg6; } else if (n == 7) { - val = ptrace_get_dr7(thread->ptrace_bps); + val = thread->ptrace_dr7; } return val; } @@ -778,8 +778,11 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) return rc; } /* All that's left is DR7 */ - if (n == 7) + if (n == 7) { rc = ptrace_write_dr7(tsk, val); + if (!rc) + thread->ptrace_dr7 = val; + } ret_path: return rc; -- cgit v1.2.3