From 57b165948fe0e1cca78bca1fe1e1ae9a38baedd2 Mon Sep 17 00:00:00 2001 From: Michael D Labriola Date: Wed, 1 Feb 2012 10:05:00 -0500 Subject: x86/reboot: Reduce to a single DMI table for reboot quirks This commit reduces the X86_32 reboot_dmi_table and the X86_64 pci_reboot_dmi_table into a single table with a single set of functions (e.g., only 1 call to core_initcall). The table entries that use set_bios_reboot are grouped together inside a #define CONFIG_X86_32 block. Note that there's a single entry that uses set_kbd_reboot, which used to be available only on X86_32. This commit moves that entry outside the X86_32 block because it seems it never should have been in there. There's multiple places in reboot.c that assume KBD is valid regardless of X86_32/X86_64. Signed-off-by: Michael D Labriola Cc: Matthew Garrett Link: http://lkml.kernel.org/n/tip-lv3aliubas2l3aenq8v3uklk@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 176 ++++++++++++++++++++++------------------------- 1 file changed, 83 insertions(+), 93 deletions(-) (limited to 'arch/x86/kernel/reboot.c') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d840e69a853c..e73973769076 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -150,6 +150,80 @@ static int __init set_bios_reboot(const struct dmi_system_id *d) return 0; } +extern const unsigned char machine_real_restart_asm[]; +extern const u64 machine_real_restart_gdt[3]; + +void machine_real_restart(unsigned int type) +{ + void *restart_va; + unsigned long restart_pa; + void (*restart_lowmem)(unsigned int); + u64 *lowmem_gdt; + + local_irq_disable(); + + /* Write zero to CMOS register number 0x0f, which the BIOS POST + routine will recognize as telling it to do a proper reboot. (Well + that's what this book in front of me says -- it may only apply to + the Phoenix BIOS though, it's not clear). At the same time, + disable NMIs by setting the top bit in the CMOS address register, + as we're about to do peculiar things to the CPU. I'm not sure if + `outb_p' is needed instead of just `outb'. Use it to be on the + safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) + */ + spin_lock(&rtc_lock); + CMOS_WRITE(0x00, 0x8f); + spin_unlock(&rtc_lock); + + /* + * Switch back to the initial page table. + */ + load_cr3(initial_page_table); + + /* Write 0x1234 to absolute memory location 0x472. The BIOS reads + this on booting to tell it to "Bypass memory test (also warm + boot)". This seems like a fairly standard thing that gets set by + REBOOT.COM programs, and the previous reset routine did this + too. */ + *((unsigned short *)0x472) = reboot_mode; + + /* Patch the GDT in the low memory trampoline */ + lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt); + + restart_va = TRAMPOLINE_SYM(machine_real_restart_asm); + restart_pa = virt_to_phys(restart_va); + restart_lowmem = (void (*)(unsigned int))restart_pa; + + /* GDT[0]: GDT self-pointer */ + lowmem_gdt[0] = + (u64)(sizeof(machine_real_restart_gdt) - 1) + + ((u64)virt_to_phys(lowmem_gdt) << 16); + /* GDT[1]: 64K real mode code segment */ + lowmem_gdt[1] = + GDT_ENTRY(0x009b, restart_pa, 0xffff); + + /* Jump to the identity-mapped low memory code */ + restart_lowmem(type); +} +#ifdef CONFIG_APM_MODULE +EXPORT_SYMBOL(machine_real_restart); +#endif + +#endif /* CONFIG_X86_32 */ + +/* + * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot + */ +static int __init set_pci_reboot(const struct dmi_system_id *d) +{ + if (reboot_type != BOOT_CF9) { + reboot_type = BOOT_CF9; + printk(KERN_INFO "%s series board detected. " + "Selecting PCI-method for reboots.\n", d->ident); + } + return 0; +} + static int __init set_kbd_reboot(const struct dmi_system_id *d) { if (reboot_type != BOOT_KBD) { @@ -159,7 +233,11 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d) return 0; } +/* This is a single dmi_table handling all reboot quirks. Note that + * REBOOT_BIOS is only available for 32bit + */ static struct dmi_system_id __initdata reboot_dmi_table[] = { +#ifdef CONFIG_X86_32 { /* Handle problems with rebooting on Dell E520's */ .callback = set_bios_reboot, .ident = "Dell E520", @@ -309,6 +387,8 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "P4S800"), }, }, +#endif /* CONFIG_X86_32 */ + { /* Handle reboot issue on Acer Aspire one */ .callback = set_kbd_reboot, .ident = "Acer Aspire One A110", @@ -317,96 +397,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"), }, }, - { } -}; - -static int __init reboot_init(void) -{ - /* Only do the DMI check if reboot_type hasn't been overridden - * on the command line - */ - if (reboot_default) { - dmi_check_system(reboot_dmi_table); - } - return 0; -} -core_initcall(reboot_init); - -extern const unsigned char machine_real_restart_asm[]; -extern const u64 machine_real_restart_gdt[3]; - -void machine_real_restart(unsigned int type) -{ - void *restart_va; - unsigned long restart_pa; - void (*restart_lowmem)(unsigned int); - u64 *lowmem_gdt; - - local_irq_disable(); - - /* Write zero to CMOS register number 0x0f, which the BIOS POST - routine will recognize as telling it to do a proper reboot. (Well - that's what this book in front of me says -- it may only apply to - the Phoenix BIOS though, it's not clear). At the same time, - disable NMIs by setting the top bit in the CMOS address register, - as we're about to do peculiar things to the CPU. I'm not sure if - `outb_p' is needed instead of just `outb'. Use it to be on the - safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) - */ - spin_lock(&rtc_lock); - CMOS_WRITE(0x00, 0x8f); - spin_unlock(&rtc_lock); - - /* - * Switch back to the initial page table. - */ - load_cr3(initial_page_table); - - /* Write 0x1234 to absolute memory location 0x472. The BIOS reads - this on booting to tell it to "Bypass memory test (also warm - boot)". This seems like a fairly standard thing that gets set by - REBOOT.COM programs, and the previous reset routine did this - too. */ - *((unsigned short *)0x472) = reboot_mode; - - /* Patch the GDT in the low memory trampoline */ - lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt); - - restart_va = TRAMPOLINE_SYM(machine_real_restart_asm); - restart_pa = virt_to_phys(restart_va); - restart_lowmem = (void (*)(unsigned int))restart_pa; - - /* GDT[0]: GDT self-pointer */ - lowmem_gdt[0] = - (u64)(sizeof(machine_real_restart_gdt) - 1) + - ((u64)virt_to_phys(lowmem_gdt) << 16); - /* GDT[1]: 64K real mode code segment */ - lowmem_gdt[1] = - GDT_ENTRY(0x009b, restart_pa, 0xffff); - - /* Jump to the identity-mapped low memory code */ - restart_lowmem(type); -} -#ifdef CONFIG_APM_MODULE -EXPORT_SYMBOL(machine_real_restart); -#endif - -#endif /* CONFIG_X86_32 */ - -/* - * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot - */ -static int __init set_pci_reboot(const struct dmi_system_id *d) -{ - if (reboot_type != BOOT_CF9) { - reboot_type = BOOT_CF9; - printk(KERN_INFO "%s series board detected. " - "Selecting PCI-method for reboots.\n", d->ident); - } - return 0; -} - -static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { { /* Handle problems with rebooting on Apple MacBook5 */ .callback = set_pci_reboot, .ident = "Apple MacBook5", @@ -474,17 +464,17 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { { } }; -static int __init pci_reboot_init(void) +static int __init reboot_init(void) { /* Only do the DMI check if reboot_type hasn't been overridden * on the command line */ if (reboot_default) { - dmi_check_system(pci_reboot_dmi_table); + dmi_check_system(reboot_dmi_table); } return 0; } -core_initcall(pci_reboot_init); +core_initcall(reboot_init); static inline void kb_wait(void) { -- cgit v1.2.3 From 144d102b926f887d3d9f909b69a5c4f504ae0d40 Mon Sep 17 00:00:00 2001 From: Michael D Labriola Date: Wed, 1 Feb 2012 10:06:34 -0500 Subject: x86/reboot: Clean up coding style This commit simply cleans up the style used in this file to fall in line with what's specified in CodingStyle. Mostly comment changes, with a single removal of unneeded braces. Note that the comments for all the DMI quirks in reboot_dmi_table now all line up consistently using tabs instead of spaces. Signed-off-by: Michael D Labriola Link: http://lkml.kernel.org/n/tip-lde9yy7qsomh0sdqevn7xp56@git.kernel.org [ Fixed a few small details. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 129 +++++++++++++++++++++++++---------------------- 1 file changed, 70 insertions(+), 59 deletions(-) (limited to 'arch/x86/kernel/reboot.c') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e73973769076..77215c23fba1 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -39,7 +39,8 @@ static int reboot_mode; enum reboot_type reboot_type = BOOT_ACPI; int reboot_force; -/* This variable is used privately to keep track of whether or not +/* + * This variable is used privately to keep track of whether or not * reboot_type is still set to its default value (i.e., reboot= hasn't * been set on the command line). This is needed so that we can * suppress DMI scanning for reboot quirks. Without it, it's @@ -51,7 +52,8 @@ static int reboot_default = 1; static int reboot_cpu = -1; #endif -/* This is set if we need to go through the 'emergency' path. +/* + * This is set if we need to go through the 'emergency' path. * When machine_emergency_restart() is called, we may be on * an inconsistent state and won't be able to do a clean cleanup */ @@ -60,22 +62,24 @@ static int reboot_emergency; /* This is set by the PCI code if either type 1 or type 2 PCI is detected */ bool port_cf9_safe = false; -/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] - warm Don't set the cold reboot flag - cold Set the cold reboot flag - bios Reboot by jumping through the BIOS (only for X86_32) - smp Reboot by executing reset on BSP or other CPU (only for X86_32) - triple Force a triple fault (init) - kbd Use the keyboard controller. cold reset (default) - acpi Use the RESET_REG in the FADT - efi Use efi reset_system runtime service - pci Use the so-called "PCI reset register", CF9 - force Avoid anything that could hang. +/* + * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] + * warm Don't set the cold reboot flag + * cold Set the cold reboot flag + * bios Reboot by jumping through the BIOS (only for X86_32) + * smp Reboot by executing reset on BSP or other CPU (only for X86_32) + * triple Force a triple fault (init) + * kbd Use the keyboard controller. cold reset (default) + * acpi Use the RESET_REG in the FADT + * efi Use efi reset_system runtime service + * pci Use the so-called "PCI reset register", CF9 + * force Avoid anything that could hang. */ static int __init reboot_setup(char *str) { for (;;) { - /* Having anything passed on the command line via + /* + * Having anything passed on the command line via * reboot= will cause us to disable DMI checking * below. */ @@ -98,9 +102,11 @@ static int __init reboot_setup(char *str) if (isdigit(*(str+2))) reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0'); } - /* we will leave sorting out the final value - when we are ready to reboot, since we might not - have detected BSP APIC ID or smp_num_cpu */ + /* + * We will leave sorting out the final value + * when we are ready to reboot, since we might not + * have detected BSP APIC ID or smp_num_cpu + */ break; #endif /* CONFIG_SMP */ @@ -162,14 +168,15 @@ void machine_real_restart(unsigned int type) local_irq_disable(); - /* Write zero to CMOS register number 0x0f, which the BIOS POST - routine will recognize as telling it to do a proper reboot. (Well - that's what this book in front of me says -- it may only apply to - the Phoenix BIOS though, it's not clear). At the same time, - disable NMIs by setting the top bit in the CMOS address register, - as we're about to do peculiar things to the CPU. I'm not sure if - `outb_p' is needed instead of just `outb'. Use it to be on the - safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) + /* + * Write zero to CMOS register number 0x0f, which the BIOS POST + * routine will recognize as telling it to do a proper reboot. (Well + * that's what this book in front of me says -- it may only apply to + * the Phoenix BIOS though, it's not clear). At the same time, + * disable NMIs by setting the top bit in the CMOS address register, + * as we're about to do peculiar things to the CPU. I'm not sure if + * `outb_p' is needed instead of just `outb'. Use it to be on the + * safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) */ spin_lock(&rtc_lock); CMOS_WRITE(0x00, 0x8f); @@ -180,11 +187,12 @@ void machine_real_restart(unsigned int type) */ load_cr3(initial_page_table); - /* Write 0x1234 to absolute memory location 0x472. The BIOS reads - this on booting to tell it to "Bypass memory test (also warm - boot)". This seems like a fairly standard thing that gets set by - REBOOT.COM programs, and the previous reset routine did this - too. */ + /* + * Write 0x1234 to absolute memory location 0x472. The BIOS reads + * this on booting to tell it to "Bypass memory test (also warm + * boot)". This seems like a fairly standard thing that gets set by + * REBOOT.COM programs, and the previous reset routine did this + * too. */ *((unsigned short *)0x472) = reboot_mode; /* Patch the GDT in the low memory trampoline */ @@ -233,7 +241,8 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d) return 0; } -/* This is a single dmi_table handling all reboot quirks. Note that +/* + * This is a single dmi_table handling all reboot quirks. Note that * REBOOT_BIOS is only available for 32bit */ static struct dmi_system_id __initdata reboot_dmi_table[] = { @@ -262,7 +271,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"), }, }, - { /* Handle problems with rebooting on Dell Optiplex 745's SFF*/ + { /* Handle problems with rebooting on Dell Optiplex 745's SFF */ .callback = set_bios_reboot, .ident = "Dell OptiPlex 745", .matches = { @@ -270,7 +279,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), }, }, - { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/ + { /* Handle problems with rebooting on Dell Optiplex 745's DFF */ .callback = set_bios_reboot, .ident = "Dell OptiPlex 745", .matches = { @@ -279,7 +288,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "0MM599"), }, }, - { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */ + { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */ .callback = set_bios_reboot, .ident = "Dell OptiPlex 745", .matches = { @@ -288,7 +297,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "0KW626"), }, }, - { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */ + { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */ .callback = set_bios_reboot, .ident = "Dell OptiPlex 330", .matches = { @@ -297,7 +306,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "0KP561"), }, }, - { /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */ + { /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */ .callback = set_bios_reboot, .ident = "Dell OptiPlex 360", .matches = { @@ -306,7 +315,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "0T656F"), }, }, - { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G*/ + { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G */ .callback = set_bios_reboot, .ident = "Dell OptiPlex 760", .matches = { @@ -379,7 +388,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), }, }, - { /* Handle problems with rebooting on ASUS P4S800 */ + { /* Handle problems with rebooting on ASUS P4S800 */ .callback = set_bios_reboot, .ident = "ASUS P4S800", .matches = { @@ -389,7 +398,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { }, #endif /* CONFIG_X86_32 */ - { /* Handle reboot issue on Acer Aspire one */ + { /* Handle reboot issue on Acer Aspire one */ .callback = set_kbd_reboot, .ident = "Acer Aspire One A110", .matches = { @@ -466,12 +475,12 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { static int __init reboot_init(void) { - /* Only do the DMI check if reboot_type hasn't been overridden + /* + * Only do the DMI check if reboot_type hasn't been overridden * on the command line */ - if (reboot_default) { + if (reboot_default) dmi_check_system(reboot_dmi_table); - } return 0; } core_initcall(reboot_init); @@ -492,14 +501,14 @@ static void vmxoff_nmi(int cpu, struct pt_regs *regs) cpu_emergency_vmxoff(); } -/* Use NMIs as IPIs to tell all CPUs to disable virtualization - */ +/* Use NMIs as IPIs to tell all CPUs to disable virtualization */ static void emergency_vmx_disable_all(void) { /* Just make sure we won't change CPUs while doing this */ local_irq_disable(); - /* We need to disable VMX on all CPUs before rebooting, otherwise + /* + * We need to disable VMX on all CPUs before rebooting, otherwise * we risk hanging up the machine, because the CPU ignore INIT * signals when VMX is enabled. * @@ -518,8 +527,7 @@ static void emergency_vmx_disable_all(void) * is still enabling VMX. */ if (cpu_has_vmx() && cpu_vmx_enabled()) { - /* Disable VMX on this CPU. - */ + /* Disable VMX on this CPU. */ cpu_vmxoff(); /* Halt and disable VMX on the other CPUs */ @@ -564,12 +572,12 @@ static void native_machine_emergency_restart(void) /* Could also try the reset bit in the Hammer NB */ switch (reboot_type) { case BOOT_KBD: - mach_reboot_fixups(); /* for board specific fixups */ + mach_reboot_fixups(); /* For board specific fixups */ for (i = 0; i < 10; i++) { kb_wait(); udelay(50); - outb(0xfe, 0x64); /* pulse reset low */ + outb(0xfe, 0x64); /* Pulse reset low */ udelay(50); } if (attempt == 0 && orig_reboot_type == BOOT_ACPI) { @@ -611,7 +619,7 @@ static void native_machine_emergency_restart(void) case BOOT_CF9: port_cf9_safe = true; - /* fall through */ + /* Fall through */ case BOOT_CF9_COND: if (port_cf9_safe) { @@ -649,7 +657,8 @@ void native_machine_shutdown(void) /* Make certain I only run on the appropriate processor */ set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); - /* O.K Now that I'm on the appropriate processor, + /* + * O.K Now that I'm on the appropriate processor, * stop all of the others. */ stop_other_cpus(); @@ -687,12 +696,11 @@ static void native_machine_restart(char *__unused) static void native_machine_halt(void) { - /* stop other cpus and apics */ + /* Stop other cpus and apics */ machine_shutdown(); tboot_shutdown(TB_SHUTDOWN_HALT); - /* stop this cpu */ stop_this_cpu(NULL); } @@ -703,7 +711,7 @@ static void native_machine_power_off(void) machine_shutdown(); pm_power_off(); } - /* a fallback in case there is no PM info available */ + /* A fallback in case there is no PM info available */ tboot_shutdown(TB_SHUTDOWN_HALT); } @@ -765,7 +773,8 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) cpu = raw_smp_processor_id(); - /* Don't do anything if this handler is invoked on crashing cpu. + /* + * Don't do anything if this handler is invoked on crashing cpu. * Otherwise, system will completely hang. Crashing cpu can get * an NMI if system was initially booted with nmi_watchdog parameter. */ @@ -789,7 +798,8 @@ static void smp_send_nmi_allbutself(void) apic->send_IPI_allbutself(NMI_VECTOR); } -/* Halt all other CPUs, calling the specified function on each of them +/* + * Halt all other CPUs, calling the specified function on each of them * * This function can be used to halt all other CPUs on crash * or emergency reboot time. The function passed as parameter @@ -800,7 +810,7 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) unsigned long msecs; local_irq_disable(); - /* Make a note of crashing cpu. Will be used in NMI callback.*/ + /* Make a note of crashing cpu. Will be used in NMI callback. */ crashing_cpu = safe_smp_processor_id(); shootdown_callback = callback; @@ -809,8 +819,9 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) /* Would it be better to replace the trap vector here? */ if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback, NMI_FLAG_FIRST, "crash")) - return; /* return what? */ - /* Ensure the new callback function is set before sending + return; /* Return what? */ + /* + * Ensure the new callback function is set before sending * out the NMI */ wmb(); -- cgit v1.2.3 From 5a8c9aebe04a78b069828d364798d5f24c5a42bd Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 8 May 2012 21:22:27 +0300 Subject: x86, realmode: Move reboot_32.S to unified realmode code Migrated reboot_32.S from x86_trampoline to the real-mode blob. Signed-off-by: Jarkko Sakkinen Link: http://lkml.kernel.org/r/1336501366-28617-5-git-send-email-jarkko.sakkinen@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/realmode.h | 4 ++ arch/x86/kernel/Makefile | 1 - arch/x86/kernel/reboot.c | 25 +------- arch/x86/kernel/reboot_32.S | 135 --------------------------------------- arch/x86/realmode/rm/Makefile | 1 + arch/x86/realmode/rm/header.S | 3 + arch/x86/realmode/rm/reboot_32.S | 134 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 145 insertions(+), 158 deletions(-) delete mode 100644 arch/x86/kernel/reboot_32.S create mode 100644 arch/x86/realmode/rm/reboot_32.S (limited to 'arch/x86/kernel/reboot.c') diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index dc1bba534c14..bf26b0681931 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -9,6 +9,10 @@ struct real_mode_header { u32 text_start; u32 ro_end; u32 end; + /* reboot */ +#ifdef CONFIG_X86_32 + u32 machine_real_restart_asm; +#endif } __attribute__((__packed__)); extern struct real_mode_header real_mode_header; diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f9e19d4eb984..b71ef35c7d77 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -49,7 +49,6 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ obj-y += acpi/ obj-y += reboot.o -obj-$(CONFIG_X86_32) += reboot_32.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d840e69a853c..050eff29a4bb 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -24,6 +24,7 @@ #ifdef CONFIG_X86_32 # include # include +# include #else # include #endif @@ -332,15 +333,10 @@ static int __init reboot_init(void) } core_initcall(reboot_init); -extern const unsigned char machine_real_restart_asm[]; -extern const u64 machine_real_restart_gdt[3]; - void machine_real_restart(unsigned int type) { - void *restart_va; - unsigned long restart_pa; - void (*restart_lowmem)(unsigned int); - u64 *lowmem_gdt; + void (*restart_lowmem)(unsigned int) = (void (*)(unsigned int)) + real_mode_header.machine_real_restart_asm; local_irq_disable(); @@ -369,21 +365,6 @@ void machine_real_restart(unsigned int type) too. */ *((unsigned short *)0x472) = reboot_mode; - /* Patch the GDT in the low memory trampoline */ - lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt); - - restart_va = TRAMPOLINE_SYM(machine_real_restart_asm); - restart_pa = virt_to_phys(restart_va); - restart_lowmem = (void (*)(unsigned int))restart_pa; - - /* GDT[0]: GDT self-pointer */ - lowmem_gdt[0] = - (u64)(sizeof(machine_real_restart_gdt) - 1) + - ((u64)virt_to_phys(lowmem_gdt) << 16); - /* GDT[1]: 64K real mode code segment */ - lowmem_gdt[1] = - GDT_ENTRY(0x009b, restart_pa, 0xffff); - /* Jump to the identity-mapped low memory code */ restart_lowmem(type); } diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/kernel/reboot_32.S deleted file mode 100644 index 1d5c46df0d78..000000000000 --- a/arch/x86/kernel/reboot_32.S +++ /dev/null @@ -1,135 +0,0 @@ -#include -#include -#include -#include - -/* - * The following code and data reboots the machine by switching to real - * mode and jumping to the BIOS reset entry point, as if the CPU has - * really been reset. The previous version asked the keyboard - * controller to pulse the CPU reset line, which is more thorough, but - * doesn't work with at least one type of 486 motherboard. It is easy - * to stop this code working; hence the copious comments. - * - * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax. - */ - .section ".x86_trampoline","a" - .balign 16 - .code32 -ENTRY(machine_real_restart_asm) -r_base = . - /* Get our own relocated address */ - call 1f -1: popl %ebx - subl $(1b - r_base), %ebx - - /* Compute the equivalent real-mode segment */ - movl %ebx, %ecx - shrl $4, %ecx - - /* Patch post-real-mode segment jump */ - movw (dispatch_table - r_base)(%ebx,%eax,2),%ax - movw %ax, (101f - r_base)(%ebx) - movw %cx, (102f - r_base)(%ebx) - - /* Set up the IDT for real mode. */ - lidtl (machine_real_restart_idt - r_base)(%ebx) - - /* - * Set up a GDT from which we can load segment descriptors for real - * mode. The GDT is not used in real mode; it is just needed here to - * prepare the descriptors. - */ - lgdtl (machine_real_restart_gdt - r_base)(%ebx) - - /* - * Load the data segment registers with 16-bit compatible values - */ - movl $16, %ecx - movl %ecx, %ds - movl %ecx, %es - movl %ecx, %fs - movl %ecx, %gs - movl %ecx, %ss - ljmpl $8, $1f - r_base - -/* - * This is 16-bit protected mode code to disable paging and the cache, - * switch to real mode and jump to the BIOS reset code. - * - * The instruction that switches to real mode by writing to CR0 must be - * followed immediately by a far jump instruction, which set CS to a - * valid value for real mode, and flushes the prefetch queue to avoid - * running instructions that have already been decoded in protected - * mode. - * - * Clears all the flags except ET, especially PG (paging), PE - * (protected-mode enable) and TS (task switch for coprocessor state - * save). Flushes the TLB after paging has been disabled. Sets CD and - * NW, to disable the cache on a 486, and invalidates the cache. This - * is more like the state of a 486 after reset. I don't know if - * something else should be done for other chips. - * - * More could be done here to set up the registers as if a CPU reset had - * occurred; hopefully real BIOSs don't assume much. This is not the - * actual BIOS entry point, anyway (that is at 0xfffffff0). - * - * Most of this work is probably excessive, but it is what is tested. - */ - .code16 -1: - xorl %ecx, %ecx - movl %cr0, %eax - andl $0x00000011, %eax - orl $0x60000000, %eax - movl %eax, %cr0 - movl %ecx, %cr3 - movl %cr0, %edx - andl $0x60000000, %edx /* If no cache bits -> no wbinvd */ - jz 2f - wbinvd -2: - andb $0x10, %al - movl %eax, %cr0 - .byte 0xea /* ljmpw */ -101: .word 0 /* Offset */ -102: .word 0 /* Segment */ - -bios: - ljmpw $0xf000, $0xfff0 - -apm: - movw $0x1000, %ax - movw %ax, %ss - movw $0xf000, %sp - movw $0x5307, %ax - movw $0x0001, %bx - movw $0x0003, %cx - int $0x15 - -END(machine_real_restart_asm) - - .balign 16 - /* These must match +#include +#include +#include + +/* + * The following code and data reboots the machine by switching to real + * mode and jumping to the BIOS reset entry point, as if the CPU has + * really been reset. The previous version asked the keyboard + * controller to pulse the CPU reset line, which is more thorough, but + * doesn't work with at least one type of 486 motherboard. It is easy + * to stop this code working; hence the copious comments. + * + * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax. + */ + .section ".text32", "ax" + .code32 + .globl machine_real_restart_asm + + .balign 16 +machine_real_restart_asm: + /* Set up the IDT for real mode. */ + lidtl pa_machine_real_restart_idt + + /* + * Set up a GDT from which we can load segment descriptors for real + * mode. The GDT is not used in real mode; it is just needed here to + * prepare the descriptors. + */ + lgdtl pa_machine_real_restart_gdt + + /* + * Load the data segment registers with 16-bit compatible values + */ + movl $16, %ecx + movl %ecx, %ds + movl %ecx, %es + movl %ecx, %fs + movl %ecx, %gs + movl %ecx, %ss + ljmpw $8, $1f + +/* + * This is 16-bit protected mode code to disable paging and the cache, + * switch to real mode and jump to the BIOS reset code. + * + * The instruction that switches to real mode by writing to CR0 must be + * followed immediately by a far jump instruction, which set CS to a + * valid value for real mode, and flushes the prefetch queue to avoid + * running instructions that have already been decoded in protected + * mode. + * + * Clears all the flags except ET, especially PG (paging), PE + * (protected-mode enable) and TS (task switch for coprocessor state + * save). Flushes the TLB after paging has been disabled. Sets CD and + * NW, to disable the cache on a 486, and invalidates the cache. This + * is more like the state of a 486 after reset. I don't know if + * something else should be done for other chips. + * + * More could be done here to set up the registers as if a CPU reset had + * occurred; hopefully real BIOSs don't assume much. This is not the + * actual BIOS entry point, anyway (that is at 0xfffffff0). + * + * Most of this work is probably excessive, but it is what is tested. + */ + .text + .code16 + + .balign 16 +machine_real_restart_asm16: +1: + xorl %ecx, %ecx + movl %cr0, %edx + andl $0x00000011, %edx + orl $0x60000000, %edx + movl %edx, %cr0 + movl %ecx, %cr3 + movl %cr0, %edx + andl $0x60000000, %edx /* If no cache bits -> no wbinvd */ + jz 2f + wbinvd +2: + andb $0x10, %dl + movl %edx, %cr0 + .byte 0xea /* ljmpw */ + .word 3f /* Offset */ + .word real_mode_seg /* Segment */ + +3: + testb $0, %al + jz bios + +apm: + movw $0x1000, %ax + movw %ax, %ss + movw $0xf000, %sp + movw $0x5307, %ax + movw $0x0001, %bx + movw $0x0003, %cx + int $0x15 + /* This should never return... */ + +bios: + ljmpw $0xf000, $0xfff0 + + .section ".rodata", "a" + .globl machine_real_restart_idt, machine_real_restart_gdt + + .balign 16 +machine_real_restart_idt: + .word 0xffff /* Length - real mode default value */ + .long 0 /* Base - real mode default value */ + + .balign 16 +machine_real_restart_gdt: + /* Self-pointer */ + .word 0xffff /* Length - real mode default value */ + .long pa_machine_real_restart_gdt + .word 0 + + /* + * 16-bit code segment pointing to real_mode_seg + * Selector value 8 + */ + .word 0xffff /* Limit */ + .long 0x9b000000 + pa_real_mode_base + .word 0 + + /* + * 16-bit data segment with the selector value 16 = 0x10 and + * base value 0x100; since this is consistent with real mode + * semantics we don't have to reload the segments once CR0.PE = 0. + */ + .quad GDT_ENTRY(0x0093, 0x100, 0xffff) -- cgit v1.2.3 From b429dbf6e866bd6dadb56fae66f61f611cde57ff Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 8 May 2012 21:22:41 +0300 Subject: x86, realmode: don't copy real_mode_header Replaced copying of real_mode_header with a pointer to beginning of RM memory. Signed-off-by: Jarkko Sakkinen Link: http://lkml.kernel.org/r/1336501366-28617-19-git-send-email-jarkko.sakkinen@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/realmode.h | 5 ++-- arch/x86/kernel/acpi/sleep.c | 2 +- arch/x86/kernel/realmode.c | 57 ++++++++++++++++--------------------- arch/x86/kernel/reboot.c | 2 +- arch/x86/kernel/smpboot.c | 4 +-- arch/x86/kernel/tboot.c | 2 +- arch/x86/realmode/rm/header.S | 1 - arch/x86/realmode/rm/realmode.lds.S | 1 - arch/x86/realmode/rmpiggy.S | 2 ++ drivers/acpi/sleep.c | 2 +- 10 files changed, 35 insertions(+), 43 deletions(-) (limited to 'arch/x86/kernel/reboot.c') diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 1bfc74d213a4..d3ae49f4c3ef 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -8,7 +8,6 @@ struct real_mode_header { u32 text_start; u32 ro_end; - u32 end; /* reboot */ #ifdef CONFIG_X86_32 u32 machine_real_restart_asm; @@ -30,8 +29,8 @@ struct real_mode_header { #endif } __attribute__((__packed__)); -extern struct real_mode_header real_mode_header; -extern unsigned char *real_mode_base; +extern struct real_mode_header *real_mode_header; +extern unsigned char real_mode_blob_end[]; extern unsigned long init_rsp; extern unsigned long initial_code; diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index d941b62da4b6..6ca3f54ebe7d 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -38,7 +38,7 @@ asmlinkage void acpi_enter_s3(void) int acpi_suspend_lowlevel(void) { struct wakeup_header *header = - (struct wakeup_header *) __va(real_mode_header.wakeup_header); + (struct wakeup_header *) __va(real_mode_header->wakeup_header); if (header->signature != WAKEUP_HEADER_SIGNATURE) { printk(KERN_ERR "wakeup header does not match\n"); diff --git a/arch/x86/kernel/realmode.c b/arch/x86/kernel/realmode.c index e7bf82a409bf..632c810ec8ea 100644 --- a/arch/x86/kernel/realmode.c +++ b/arch/x86/kernel/realmode.c @@ -5,8 +5,7 @@ #include #include -unsigned char *real_mode_base; -struct real_mode_header real_mode_header; +struct real_mode_header *real_mode_header; void __init setup_real_mode(void) { @@ -17,33 +16,32 @@ void __init setup_real_mode(void) u32 *ptr; u16 *seg; int i; + unsigned char *base; - struct real_mode_header *header = - (struct real_mode_header *) real_mode_blob; - - size_t size = PAGE_ALIGN(header->end); + size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); /* Has to be in very low memory so we can execute real-mode AP code. */ mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); if (!mem) panic("Cannot allocate trampoline\n"); - real_mode_base = __va(mem); + base = __va(mem); memblock_reserve(mem, size); + real_mode_header = (struct real_mode_header *) base; printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", - real_mode_base, (unsigned long long)mem, size); + base, (unsigned long long)mem, size); - memcpy(real_mode_base, real_mode_blob, size); + memcpy(base, real_mode_blob, size); - real_mode_seg = __pa(real_mode_base) >> 4; + real_mode_seg = __pa(base) >> 4; rel = (u32 *) real_mode_relocs; /* 16-bit segment relocations. */ count = rel[0]; rel = &rel[1]; for (i = 0; i < count; i++) { - seg = (u16 *) (real_mode_base + rel[i]); + seg = (u16 *) (base + rel[i]); *seg = real_mode_seg; } @@ -51,25 +49,21 @@ void __init setup_real_mode(void) count = rel[i]; rel = &rel[i + 1]; for (i = 0; i < count; i++) { - ptr = (u32 *) (real_mode_base + rel[i]); - *ptr += __pa(real_mode_base); + ptr = (u32 *) (base + rel[i]); + *ptr += __pa(base); } - /* Copied header will contain relocated physical addresses. */ - memcpy(&real_mode_header, real_mode_base, - sizeof(struct real_mode_header)); - #ifdef CONFIG_X86_32 - *((u32 *)__va(real_mode_header.startup_32_smp)) = __pa(startup_32_smp); - *((u32 *)__va(real_mode_header.boot_gdt)) = __pa(boot_gdt); + *((u32 *)__va(real_mode_header->startup_32_smp)) = __pa(startup_32_smp); + *((u32 *)__va(real_mode_header->boot_gdt)) = __pa(boot_gdt); #else - *((u64 *) __va(real_mode_header.startup_64_smp)) = + *((u64 *) __va(real_mode_header->startup_64_smp)) = (u64)secondary_startup_64; - *((u64 *) __va(real_mode_header.level3_ident_pgt)) = + *((u64 *) __va(real_mode_header->level3_ident_pgt)) = __pa(level3_ident_pgt) + _KERNPG_TABLE; - *((u64 *) __va(real_mode_header.level3_kernel_pgt)) = + *((u64 *) __va(real_mode_header->level3_kernel_pgt)) = __pa(level3_kernel_pgt) + _KERNPG_TABLE; #endif } @@ -82,23 +76,22 @@ void __init setup_real_mode(void) */ static int __init set_real_mode_permissions(void) { - size_t all_size = - PAGE_ALIGN(real_mode_header.end) - - __pa(real_mode_base); + unsigned char *base = (unsigned char *) real_mode_header; + size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); size_t ro_size = - PAGE_ALIGN(real_mode_header.ro_end) - - __pa(real_mode_base); + PAGE_ALIGN(real_mode_header->ro_end) - + __pa(base); size_t text_size = - PAGE_ALIGN(real_mode_header.ro_end) - - real_mode_header.text_start; + PAGE_ALIGN(real_mode_header->ro_end) - + real_mode_header->text_start; unsigned long text_start = - (unsigned long) __va(real_mode_header.text_start); + (unsigned long) __va(real_mode_header->text_start); - set_memory_nx((unsigned long) real_mode_base, all_size >> PAGE_SHIFT); - set_memory_ro((unsigned long) real_mode_base, ro_size >> PAGE_SHIFT); + set_memory_nx((unsigned long) base, size >> PAGE_SHIFT); + set_memory_ro((unsigned long) base, ro_size >> PAGE_SHIFT); set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT); return 0; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 050eff29a4bb..658f856f09a3 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -336,7 +336,7 @@ core_initcall(reboot_init); void machine_real_restart(unsigned int type) { void (*restart_lowmem)(unsigned int) = (void (*)(unsigned int)) - real_mode_header.machine_real_restart_asm; + real_mode_header->machine_real_restart_asm; local_irq_disable(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c7971ea74bd0..b8c0661e2341 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -665,9 +665,9 @@ static void __cpuinit announce_cpu(int cpu, int apicid) static int __cpuinit do_boot_cpu(int apicid, int cpu) { volatile u32 *trampoline_status = - (volatile u32 *) __va(real_mode_header.trampoline_status); + (volatile u32 *) __va(real_mode_header->trampoline_status); /* start_ip had better be page-aligned! */ - unsigned long start_ip = real_mode_header.trampoline_data; + unsigned long start_ip = real_mode_header->trampoline_data; unsigned long boot_error = 0; int timeout; diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index c136e2325062..65adda4fde93 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -202,7 +202,7 @@ static int tboot_setup_sleep(void) } tboot->acpi_sinfo.kernel_s3_resume_vector = - real_mode_header.wakeup_start; + real_mode_header->wakeup_start; return 0; } diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S index a91ec8f6b15f..c83005c4d455 100644 --- a/arch/x86/realmode/rm/header.S +++ b/arch/x86/realmode/rm/header.S @@ -12,7 +12,6 @@ GLOBAL(real_mode_header) .long pa_text_start .long pa_ro_end - .long pa_end #ifdef CONFIG_X86_32 .long pa_machine_real_restart_asm #endif diff --git a/arch/x86/realmode/rm/realmode.lds.S b/arch/x86/realmode/rm/realmode.lds.S index 4d4afcaf5f02..86b2e8d6b1f1 100644 --- a/arch/x86/realmode/rm/realmode.lds.S +++ b/arch/x86/realmode/rm/realmode.lds.S @@ -65,7 +65,6 @@ SECTIONS .signature : { *(.signature) } - pa_end = .; /DISCARD/ : { *(.note*) diff --git a/arch/x86/realmode/rmpiggy.S b/arch/x86/realmode/rmpiggy.S index fd72a99d12ae..204c6ece0e97 100644 --- a/arch/x86/realmode/rmpiggy.S +++ b/arch/x86/realmode/rmpiggy.S @@ -13,6 +13,8 @@ GLOBAL(real_mode_blob) .incbin "arch/x86/realmode/rm/realmode.bin" END(real_mode_blob) +GLOBAL(real_mode_blob_end); + GLOBAL(real_mode_relocs) .incbin "arch/x86/realmode/rm/realmode.relocs" END(real_mode_relocs) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index e77aa4a1c9f6..06139005c4dd 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -93,7 +93,7 @@ static struct notifier_block tts_notifier = { static int acpi_sleep_prepare(u32 acpi_state) { #ifdef CONFIG_ACPI_SLEEP - unsigned long wakeup_pa = real_mode_header.wakeup_start; + unsigned long wakeup_pa = real_mode_header->wakeup_start; /* do we have a wakeup address for S2 and S3? */ if (acpi_state == ACPI_STATE_S3) { if (!wakeup_pa) -- cgit v1.2.3 From 76eb9a30db4bc8fd172f9155247264b5f2686d7b Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 20 Feb 2012 14:20:06 +0800 Subject: ACPI, x86: fix Dell M6600 ACPI reboot regression via DMI Dell Precision M6600 is known to require PCI reboot, so add it to the reboot blacklist in pci_reboot_dmi_table[]. https://bugzilla.kernel.org/show_bug.cgi?id=42749 cc: x86@kernel.org Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/kernel/reboot.c') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 79c45af81604..412db5716d91 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -451,6 +451,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"), }, }, + { /* Handle problems with rebooting on the Precision M6600. */ + .callback = set_pci_reboot, + .ident = "Dell OptiPlex 990", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), + }, + }, { } }; -- cgit v1.2.3 From 55c844a4dd16a4d1fdc0cf2a283ec631a02ec448 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 30 May 2012 23:15:41 +0800 Subject: x86/reboot: Fix a warning message triggered by stop_other_cpus() When rebooting our 24 CPU Westmere servers with 3.4-rc6, we always see this warning msg: Restarting system. machine restart ------------[ cut here ]------------ WARNING: at arch/x86/kernel/smp.c:125 native_smp_send_reschedule+0x74/0xa7() Hardware name: X8DTN Modules linked in: igb [last unloaded: scsi_wait_scan] Pid: 1, comm: systemd-shutdow Not tainted 3.4.0-rc6+ #22 Call Trace: [] warn_slowpath_common+0x7e/0x96 [] warn_slowpath_null+0x15/0x17 [] native_smp_send_reschedule+0x74/0xa7 [] trigger_load_balance+0x279/0x2a6 [] scheduler_tick+0xe0/0xe9 [] update_process_times+0x60/0x70 [] tick_sched_timer+0x68/0x92 [] __run_hrtimer+0xb3/0x13c [] ? tick_nohz_handler+0xd0/0xd0 [] hrtimer_interrupt+0xdb/0x198 [] smp_apic_timer_interrupt+0x81/0x94 [] apic_timer_interrupt+0x67/0x70 [] ? default_send_IPI_mask_allbutself_phys+0xb4/0xc4 [] physflat_send_IPI_allbutself+0x12/0x14 [] native_nmi_stop_other_cpus+0x8a/0xd6 [] native_machine_shutdown+0x50/0x67 [] machine_shutdown+0xa/0xc [] native_machine_restart+0x20/0x32 [] machine_restart+0xa/0xc [] kernel_restart+0x47/0x4c [] sys_reboot+0x13e/0x17c [] ? _raw_spin_unlock_bh+0x10/0x12 [] ? bdi_queue_work+0xcf/0xd8 [] ? __bdi_start_writeback+0xae/0xb7 [] ? iterate_supers+0xa3/0xb7 [] system_call_fastpath+0x16/0x1b ---[ end trace 320af5cb1cb60c5b ]--- The root cause seems to be the default_send_IPI_mask_allbutself_phys() takes quite some time (I measured it could be several ms) to complete sending NMIs to all the other 23 CPUs, and for HZ=250/1000 system, the time is long enough for a timer interrupt to happen, which will in turn trigger to kick load balance to a stopped CPU and cause this warning in native_smp_send_reschedule(). So disabling the local irq before stop_other_cpu() can fix this problem (tested 25 times reboot ok), and it is fine as there should be nobody caring the timer interrupt in such reboot stage. The latest 3.4 kernel slightly changes this behavior by sending REBOOT_VECTOR first and only send NMI_VECTOR if the REBOOT_VCTOR fails, and this patch is still needed to prevent the problem. Signed-off-by: Feng Tang Acked-by: Don Zickus Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120530231541.4c13433a@feng-i7 Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/reboot.c') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 79c45af81604..25b48edb847c 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -639,9 +639,11 @@ void native_machine_shutdown(void) set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); /* - * O.K Now that I'm on the appropriate processor, - * stop all of the others. + * O.K Now that I'm on the appropriate processor, stop all of the + * others. Also disable the local irq to not receive the per-cpu + * timer interrupt which may trigger scheduler's load balance. */ + local_irq_disable(); stop_other_cpus(); #endif -- cgit v1.2.3