summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/smpboot.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 23:59:56 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 23:59:56 +0300
commit9244724fbf8ab394a7210e8e93bf037abc859514 (patch)
tree95c2b9caf65ac531b6649247e99dc554e3bca96c /arch/x86/kernel/smpboot.c
parent7cffdbe3607a6cc2dc02d135e13732ec36bc4e28 (diff)
parentbf5a8c26ad7caf0772a1cd48c8a0924e48bdbaf0 (diff)
downloadlinux-9244724fbf8ab394a7210e8e93bf037abc859514.tar.xz
Merge tag 'smp-core-2023-06-26' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull SMP updates from Thomas Gleixner: "A large update for SMP management: - Parallel CPU bringup The reason why people are interested in parallel bringup is to shorten the (kexec) reboot time of cloud servers to reduce the downtime of the VM tenants. The current fully serialized bringup does the following per AP: 1) Prepare callbacks (allocate, intialize, create threads) 2) Kick the AP alive (e.g. INIT/SIPI on x86) 3) Wait for the AP to report alive state 4) Let the AP continue through the atomic bringup 5) Let the AP run the threaded bringup to full online state There are two significant delays: #3 The time for an AP to report alive state in start_secondary() on x86 has been measured in the range between 350us and 3.5ms depending on vendor and CPU type, BIOS microcode size etc. #4 The atomic bringup does the microcode update. This has been measured to take up to ~8ms on the primary threads depending on the microcode patch size to apply. On a two socket SKL server with 56 cores (112 threads) the boot CPU spends on current mainline about 800ms busy waiting for the APs to come up and apply microcode. That's more than 80% of the actual onlining procedure. This can be reduced significantly by splitting the bringup mechanism into two parts: 1) Run the prepare callbacks and kick the AP alive for each AP which needs to be brought up. The APs wake up, do their firmware initialization and run the low level kernel startup code including microcode loading in parallel up to the first synchronization point. (#1 and #2 above) 2) Run the rest of the bringup code strictly serialized per CPU (#3 - #5 above) as it's done today. Parallelizing that stage of the CPU bringup might be possible in theory, but it's questionable whether required surgery would be justified for a pretty small gain. If the system is large enough the first AP is already waiting at the first synchronization point when the boot CPU finished the wake-up of the last AP. That reduces the AP bringup time on that SKL from ~800ms to ~80ms, i.e. by a factor ~10x. The actual gain varies wildly depending on the system, CPU, microcode patch size and other factors. There are some opportunities to reduce the overhead further, but that needs some deep surgery in the x86 CPU bringup code. For now this is only enabled on x86, but the core functionality obviously works for all SMP capable architectures. - Enhancements for SMP function call tracing so it is possible to locate the scheduling and the actual execution points. That allows to measure IPI delivery time precisely" * tag 'smp-core-2023-06-26' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits) trace,smp: Add tracepoints for scheduling remotelly called functions trace,smp: Add tracepoints around remotelly called functions MAINTAINERS: Add CPU HOTPLUG entry x86/smpboot: Fix the parallel bringup decision x86/realmode: Make stack lock work in trampoline_compat() x86/smp: Initialize cpu_primary_thread_mask late cpu/hotplug: Fix off by one in cpuhp_bringup_mask() x86/apic: Fix use of X{,2}APIC_ENABLE in asm with older binutils x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it x86/smpboot: Support parallel startup of secondary CPUs x86/smpboot: Implement a bit spinlock to protect the realmode stack x86/apic: Save the APIC virtual base address cpu/hotplug: Allow "parallel" bringup up to CPUHP_BP_KICK_AP_STATE x86/apic: Provide cpu_primary_thread mask x86/smpboot: Enable split CPU startup cpu/hotplug: Provide a split up CPUHP_BRINGUP mechanism cpu/hotplug: Reset task stack state in _cpu_up() cpu/hotplug: Remove unused state functions riscv: Switch to hotplug core state synchronization parisc: Switch to hotplug core state synchronization ...
Diffstat (limited to 'arch/x86/kernel/smpboot.c')
-rw-r--r--arch/x86/kernel/smpboot.c524
1 files changed, 175 insertions, 349 deletions
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 352f0ce1ece4..8de80608fd01 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -57,6 +57,8 @@
#include <linux/pgtable.h>
#include <linux/overflow.h>
#include <linux/stackprotector.h>
+#include <linux/cpuhotplug.h>
+#include <linux/mc146818rtc.h>
#include <asm/acpi.h>
#include <asm/cacheinfo.h>
@@ -74,7 +76,7 @@
#include <asm/fpu/api.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
-#include <linux/mc146818rtc.h>
+#include <asm/microcode.h>
#include <asm/i8259.h>
#include <asm/misc.h>
#include <asm/qspinlock.h>
@@ -101,6 +103,12 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map);
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
+/* CPUs which are the primary SMT threads */
+struct cpumask __cpu_primary_thread_mask __read_mostly;
+
+/* Representing CPUs for which sibling maps can be computed */
+static cpumask_var_t cpu_sibling_setup_mask;
+
/* Logical package management. We might want to allocate that dynamically */
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
@@ -121,7 +129,6 @@ int arch_update_cpu_topology(void)
return retval;
}
-
static unsigned int smpboot_warm_reset_vector_count;
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
@@ -154,66 +161,59 @@ static inline void smpboot_restore_warm_reset_vector(void)
}
-/*
- * Report back to the Boot Processor during boot time or to the caller processor
- * during CPU online.
- */
-static void smp_callin(void)
+/* Run the next set of setup steps for the upcoming CPU */
+static void ap_starting(void)
{
- int cpuid;
+ int cpuid = smp_processor_id();
/*
- * If waken up by an INIT in an 82489DX configuration
- * cpu_callout_mask guarantees we don't get here before
- * an INIT_deassert IPI reaches our local APIC, so it is
- * now safe to touch our local APIC.
- */
- cpuid = smp_processor_id();
-
- /*
- * the boot CPU has finished the init stage and is spinning
- * on callin_map until we finish. We are free to set up this
- * CPU, first the APIC. (this is probably redundant on most
- * boards)
+ * If woken up by an INIT in an 82489DX configuration the alive
+ * synchronization guarantees that the CPU does not reach this
+ * point before an INIT_deassert IPI reaches the local APIC, so it
+ * is now safe to touch the local APIC.
+ *
+ * Set up this CPU, first the APIC, which is probably redundant on
+ * most boards.
*/
apic_ap_setup();
- /*
- * Save our processor parameters. Note: this information
- * is needed for clock calibration.
- */
+ /* Save the processor parameters. */
smp_store_cpu_info(cpuid);
/*
* The topology information must be up to date before
- * calibrate_delay() and notify_cpu_starting().
+ * notify_cpu_starting().
*/
- set_cpu_sibling_map(raw_smp_processor_id());
+ set_cpu_sibling_map(cpuid);
ap_init_aperfmperf();
- /*
- * Get our bogomips.
- * Update loops_per_jiffy in cpu_data. Previous call to
- * smp_store_cpu_info() stored a value that is close but not as
- * accurate as the value just calculated.
- */
- calibrate_delay();
- cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
pr_debug("Stack at about %p\n", &cpuid);
wmb();
+ /*
+ * This runs the AP through all the cpuhp states to its target
+ * state CPUHP_ONLINE.
+ */
notify_cpu_starting(cpuid);
+}
+static void ap_calibrate_delay(void)
+{
/*
- * Allow the master to continue.
+ * Calibrate the delay loop and update loops_per_jiffy in cpu_data.
+ * smp_store_cpu_info() stored a value that is close but not as
+ * accurate as the value just calculated.
+ *
+ * As this is invoked after the TSC synchronization check,
+ * calibrate_delay_is_known() will skip the calibration routine
+ * when TSC is synchronized across sockets.
*/
- cpumask_set_cpu(cpuid, cpu_callin_mask);
+ calibrate_delay();
+ cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy;
}
-static int cpu0_logical_apicid;
-static int enable_start_cpu0;
/*
* Activate a secondary processor.
*/
@@ -226,24 +226,63 @@ static void notrace start_secondary(void *unused)
*/
cr4_init();
-#ifdef CONFIG_X86_32
- /* switch away from the initial page table */
- load_cr3(swapper_pg_dir);
- __flush_tlb_all();
-#endif
- cpu_init_secondary();
+ /*
+ * 32-bit specific. 64-bit reaches this code with the correct page
+ * table established. Yet another historical divergence.
+ */
+ if (IS_ENABLED(CONFIG_X86_32)) {
+ /* switch away from the initial page table */
+ load_cr3(swapper_pg_dir);
+ __flush_tlb_all();
+ }
+
+ cpu_init_exception_handling();
+
+ /*
+ * 32-bit systems load the microcode from the ASM startup code for
+ * historical reasons.
+ *
+ * On 64-bit systems load it before reaching the AP alive
+ * synchronization point below so it is not part of the full per
+ * CPU serialized bringup part when "parallel" bringup is enabled.
+ *
+ * That's even safe when hyperthreading is enabled in the CPU as
+ * the core code starts the primary threads first and leaves the
+ * secondary threads waiting for SIPI. Loading microcode on
+ * physical cores concurrently is a safe operation.
+ *
+ * This covers both the Intel specific issue that concurrent
+ * microcode loading on SMT siblings must be prohibited and the
+ * vendor independent issue`that microcode loading which changes
+ * CPUID, MSRs etc. must be strictly serialized to maintain
+ * software state correctness.
+ */
+ if (IS_ENABLED(CONFIG_X86_64))
+ load_ucode_ap();
+
+ /*
+ * Synchronization point with the hotplug core. Sets this CPUs
+ * synchronization state to ALIVE and spin-waits for the control CPU to
+ * release this CPU for further bringup.
+ */
+ cpuhp_ap_sync_alive();
+
+ cpu_init();
+ fpu__init_cpu();
rcu_cpu_starting(raw_smp_processor_id());
x86_cpuinit.early_percpu_clock_init();
- smp_callin();
- enable_start_cpu0 = 0;
+ ap_starting();
+
+ /* Check TSC synchronization with the control CPU. */
+ check_tsc_sync_target();
- /* otherwise gcc will move up smp_processor_id before the cpu_init */
- barrier();
/*
- * Check TSC synchronization with the boot CPU:
+ * Calibrate the delay loop after the TSC synchronization check.
+ * This allows to skip the calibration when TSC is synchronized
+ * across sockets.
*/
- check_tsc_sync_target();
+ ap_calibrate_delay();
speculative_store_bypass_ht_init();
@@ -257,7 +296,6 @@ static void notrace start_secondary(void *unused)
set_cpu_online(smp_processor_id(), true);
lapic_online();
unlock_vector_lock();
- cpu_set_state_online(smp_processor_id());
x86_platform.nmi_init();
/* enable local interrupts */
@@ -270,15 +308,6 @@ static void notrace start_secondary(void *unused)
}
/**
- * topology_is_primary_thread - Check whether CPU is the primary SMT thread
- * @cpu: CPU to check
- */
-bool topology_is_primary_thread(unsigned int cpu)
-{
- return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu));
-}
-
-/**
* topology_smt_supported - Check whether SMT is supported by the CPUs
*/
bool topology_smt_supported(void)
@@ -288,6 +317,7 @@ bool topology_smt_supported(void)
/**
* topology_phys_to_logical_pkg - Map a physical package id to a logical
+ * @phys_pkg: The physical package id to map
*
* Returns logical package id or -1 if not found
*/
@@ -304,15 +334,17 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg)
return -1;
}
EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+
/**
* topology_phys_to_logical_die - Map a physical die id to logical
+ * @die_id: The physical die id to map
+ * @cur_cpu: The CPU for which the mapping is done
*
* Returns logical die id or -1 if not found
*/
-int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
+static int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
{
- int cpu;
- int proc_id = cpu_data(cur_cpu).phys_proc_id;
+ int cpu, proc_id = cpu_data(cur_cpu).phys_proc_id;
for_each_possible_cpu(cpu) {
struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -323,7 +355,6 @@ int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
}
return -1;
}
-EXPORT_SYMBOL(topology_phys_to_logical_die);
/**
* topology_update_package_map - Update the physical to logical package map
@@ -398,7 +429,7 @@ void smp_store_cpu_info(int id)
c->cpu_index = id;
/*
* During boot time, CPU0 has this setup already. Save the info when
- * bringing up AP or offlined CPU0.
+ * bringing up an AP.
*/
identify_secondary_cpu(c);
c->initialized = true;
@@ -706,9 +737,9 @@ static void impress_friends(void)
* Allow the user to impress friends.
*/
pr_debug("Before bogomips\n");
- for_each_possible_cpu(cpu)
- if (cpumask_test_cpu(cpu, cpu_callout_mask))
- bogosum += cpu_data(cpu).loops_per_jiffy;
+ for_each_online_cpu(cpu)
+ bogosum += cpu_data(cpu).loops_per_jiffy;
+
pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
num_online_cpus(),
bogosum/(500000/HZ),
@@ -795,51 +826,14 @@ static void __init smp_quirk_init_udelay(void)
}
/*
- * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
- * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
- * won't ... remember to clear down the APIC, etc later.
+ * Wake up AP by INIT, INIT, STARTUP sequence.
*/
-int
-wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
-{
- u32 dm = apic->dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
- unsigned long send_status, accept_status = 0;
- int maxlvt;
-
- /* Target chip */
- /* Boot on the stack */
- /* Kick the second */
- apic_icr_write(APIC_DM_NMI | dm, apicid);
-
- pr_debug("Waiting for send to finish...\n");
- send_status = safe_apic_wait_icr_idle();
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(200);
- if (APIC_INTEGRATED(boot_cpu_apic_version)) {
- maxlvt = lapic_get_maxlvt();
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
- apic_write(APIC_ESR, 0);
- accept_status = (apic_read(APIC_ESR) & 0xEF);
- }
- pr_debug("NMI sent\n");
-
- if (send_status)
- pr_err("APIC never delivered???\n");
- if (accept_status)
- pr_err("APIC delivery error (%lx)\n", accept_status);
-
- return (send_status | accept_status);
-}
-
-static int
-wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
+static int wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
{
unsigned long send_status = 0, accept_status = 0;
int maxlvt, num_starts, j;
+ preempt_disable();
maxlvt = lapic_get_maxlvt();
/*
@@ -945,15 +939,16 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
if (accept_status)
pr_err("APIC delivery error (%lx)\n", accept_status);
+ preempt_enable();
return (send_status | accept_status);
}
/* reduce the number of lines printed when booting a large cpu count system */
static void announce_cpu(int cpu, int apicid)
{
+ static int width, node_width, first = 1;
static int current_node = NUMA_NO_NODE;
int node = early_cpu_to_node(cpu);
- static int width, node_width;
if (!width)
width = num_digits(num_possible_cpus()) + 1; /* + '#' sign */
@@ -961,10 +956,10 @@ static void announce_cpu(int cpu, int apicid)
if (!node_width)
node_width = num_digits(num_possible_nodes()) + 1; /* + '#' */
- if (cpu == 1)
- printk(KERN_INFO "x86: Booting SMP configuration:\n");
-
if (system_state < SYSTEM_RUNNING) {
+ if (first)
+ pr_info("x86: Booting SMP configuration:\n");
+
if (node != current_node) {
if (current_node > (-1))
pr_cont("\n");
@@ -975,77 +970,16 @@ static void announce_cpu(int cpu, int apicid)
}
/* Add padding for the BSP */
- if (cpu == 1)
+ if (first)
pr_cont("%*s", width + 1, " ");
+ first = 0;
pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu);
-
} else
pr_info("Booting Node %d Processor %d APIC 0x%x\n",
node, cpu, apicid);
}
-static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs)
-{
- int cpu;
-
- cpu = smp_processor_id();
- if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0)
- return NMI_HANDLED;
-
- return NMI_DONE;
-}
-
-/*
- * Wake up AP by INIT, INIT, STARTUP sequence.
- *
- * Instead of waiting for STARTUP after INITs, BSP will execute the BIOS
- * boot-strap code which is not a desired behavior for waking up BSP. To
- * void the boot-strap code, wake up CPU0 by NMI instead.
- *
- * This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined
- * (i.e. physically hot removed and then hot added), NMI won't wake it up.
- * We'll change this code in the future to wake up hard offlined CPU0 if
- * real platform and request are available.
- */
-static int
-wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
- int *cpu0_nmi_registered)
-{
- int id;
- int boot_error;
-
- preempt_disable();
-
- /*
- * Wake up AP by INIT, INIT, STARTUP sequence.
- */
- if (cpu) {
- boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
- goto out;
- }
-
- /*
- * Wake up BSP by nmi.
- *
- * Register a NMI handler to help wake up CPU0.
- */
- boot_error = register_nmi_handler(NMI_LOCAL,
- wakeup_cpu0_nmi, 0, "wake_cpu0");
-
- if (!boot_error) {
- enable_start_cpu0 = 1;
- *cpu0_nmi_registered = 1;
- id = apic->dest_mode_logical ? cpu0_logical_apicid : apicid;
- boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip);
- }
-
-out:
- preempt_enable();
-
- return boot_error;
-}
-
int common_cpu_up(unsigned int cpu, struct task_struct *idle)
{
int ret;
@@ -1071,17 +1005,13 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
- * Returns zero if CPU booted OK, else error code from
+ * Returns zero if startup was successfully sent, else error code from
* ->wakeup_secondary_cpu.
*/
-static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
- int *cpu0_nmi_registered)
+static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
{
- /* start_ip had better be page-aligned! */
unsigned long start_ip = real_mode_header->trampoline_start;
-
- unsigned long boot_error = 0;
- unsigned long timeout;
+ int ret;
#ifdef CONFIG_X86_64
/* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
@@ -1094,7 +1024,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
if (IS_ENABLED(CONFIG_X86_32)) {
early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
initial_stack = idle->thread.sp;
- } else {
+ } else if (!(smpboot_control & STARTUP_PARALLEL_MASK)) {
smpboot_control = cpu;
}
@@ -1108,7 +1038,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
* This grunge runs the startup process for
* the targeted processor.
*/
-
if (x86_platform.legacy.warm_reset) {
pr_debug("Setting warm reset code and vector.\n");
@@ -1123,13 +1052,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
}
}
- /*
- * AP might wait on cpu_callout_mask in cpu_init() with
- * cpu_initialized_mask set if previous attempt to online
- * it timed-out. Clear cpu_initialized_mask so that after
- * INIT/SIPI it could start with a clean state.
- */
- cpumask_clear_cpu(cpu, cpu_initialized_mask);
smp_mb();
/*
@@ -1137,66 +1059,25 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
* - Use a method from the APIC driver if one defined, with wakeup
* straight to 64-bit mode preferred over wakeup to RM.
* Otherwise,
- * - Use an INIT boot APIC message for APs or NMI for BSP.
+ * - Use an INIT boot APIC message
*/
if (apic->wakeup_secondary_cpu_64)
- boot_error = apic->wakeup_secondary_cpu_64(apicid, start_ip);
+ ret = apic->wakeup_secondary_cpu_64(apicid, start_ip);
else if (apic->wakeup_secondary_cpu)
- boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
+ ret = apic->wakeup_secondary_cpu(apicid, start_ip);
else
- boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
- cpu0_nmi_registered);
-
- if (!boot_error) {
- /*
- * Wait 10s total for first sign of life from AP
- */
- boot_error = -1;
- timeout = jiffies + 10*HZ;
- while (time_before(jiffies, timeout)) {
- if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
- /*
- * Tell AP to proceed with initialization
- */
- cpumask_set_cpu(cpu, cpu_callout_mask);
- boot_error = 0;
- break;
- }
- schedule();
- }
- }
-
- if (!boot_error) {
- /*
- * Wait till AP completes initial initialization
- */
- while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
- /*
- * Allow other tasks to run while we wait for the
- * AP to come online. This also gives a chance
- * for the MTRR work(triggered by the AP coming online)
- * to be completed in the stop machine context.
- */
- schedule();
- }
- }
-
- if (x86_platform.legacy.warm_reset) {
- /*
- * Cleanup possible dangling ends...
- */
- smpboot_restore_warm_reset_vector();
- }
+ ret = wakeup_secondary_cpu_via_init(apicid, start_ip);
- return boot_error;
+ /* If the wakeup mechanism failed, cleanup the warm reset vector */
+ if (ret)
+ arch_cpuhp_cleanup_kick_cpu(cpu);
+ return ret;
}
-int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
+int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
{
int apicid = apic->cpu_present_to_apicid(cpu);
- int cpu0_nmi_registered = 0;
- unsigned long flags;
- int err, ret = 0;
+ int err;
lockdep_assert_irqs_enabled();
@@ -1210,24 +1091,11 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
}
/*
- * Already booted CPU?
- */
- if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
- pr_debug("do_boot_cpu %d Already started\n", cpu);
- return -ENOSYS;
- }
-
- /*
* Save current MTRR state in case it was changed since early boot
* (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
*/
mtrr_save_state();
- /* x86 CPUs take themselves offline, so delayed offline is OK. */
- err = cpu_check_up_prepare(cpu);
- if (err && err != -EBUSY)
- return err;
-
/* the FPU context is blank, nobody can own it */
per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
@@ -1235,41 +1103,44 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
if (err)
return err;
- err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
- if (err) {
+ err = do_boot_cpu(apicid, cpu, tidle);
+ if (err)
pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
- ret = -EIO;
- goto unreg_nmi;
- }
- /*
- * Check TSC synchronization with the AP (keep irqs disabled
- * while doing so):
- */
- local_irq_save(flags);
- check_tsc_sync_source(cpu);
- local_irq_restore(flags);
+ return err;
+}
- while (!cpu_online(cpu)) {
- cpu_relax();
- touch_nmi_watchdog();
- }
+int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
+{
+ return smp_ops.kick_ap_alive(cpu, tidle);
+}
-unreg_nmi:
- /*
- * Clean up the nmi handler. Do this after the callin and callout sync
- * to avoid impact of possible long unregister time.
- */
- if (cpu0_nmi_registered)
- unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
+void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu)
+{
+ /* Cleanup possible dangling ends... */
+ if (smp_ops.kick_ap_alive == native_kick_ap && x86_platform.legacy.warm_reset)
+ smpboot_restore_warm_reset_vector();
+}
- return ret;
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
+{
+ if (smp_ops.cleanup_dead_cpu)
+ smp_ops.cleanup_dead_cpu(cpu);
+
+ if (system_state == SYSTEM_RUNNING)
+ pr_info("CPU %u is now offline\n", cpu);
+}
+
+void arch_cpuhp_sync_state_poll(void)
+{
+ if (smp_ops.poll_sync_state)
+ smp_ops.poll_sync_state();
}
/**
- * arch_disable_smp_support() - disables SMP support for x86 at runtime
+ * arch_disable_smp_support() - Disables SMP support for x86 at boottime
*/
-void arch_disable_smp_support(void)
+void __init arch_disable_smp_support(void)
{
disable_ioapic_support();
}
@@ -1361,14 +1232,6 @@ static void __init smp_cpu_index_default(void)
}
}
-static void __init smp_get_logical_apicid(void)
-{
- if (x2apic_mode)
- cpu0_logical_apicid = apic_read(APIC_LDR);
- else
- cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
-}
-
void __init smp_prepare_cpus_common(void)
{
unsigned int i;
@@ -1379,7 +1242,6 @@ void __init smp_prepare_cpus_common(void)
* Setup boot CPU information
*/
smp_store_boot_cpu_info(); /* Final full version of the data */
- cpumask_copy(cpu_callin_mask, cpumask_of(0));
mb();
for_each_possible_cpu(i) {
@@ -1402,6 +1264,21 @@ void __init smp_prepare_cpus_common(void)
set_cpu_sibling_map(0);
}
+#ifdef CONFIG_X86_64
+/* Establish whether parallel bringup can be supported. */
+bool __init arch_cpuhp_init_parallel_bringup(void)
+{
+ if (!x86_cpuinit.parallel_bringup) {
+ pr_info("Parallel CPU startup disabled by the platform\n");
+ return false;
+ }
+
+ smpboot_control = STARTUP_READ_APICID;
+ pr_debug("Parallel CPU startup enabled: 0x%08x\n", smpboot_control);
+ return true;
+}
+#endif
+
/*
* Prepare for SMP bootup.
* @max_cpus: configured maximum number of CPUs, It is a legacy parameter
@@ -1431,8 +1308,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
/* Setup local timer */
x86_init.timers.setup_percpu_clockev();
- smp_get_logical_apicid();
-
pr_info("CPU0: ");
print_cpu_info(&cpu_data(0));
@@ -1466,9 +1341,6 @@ void __init native_smp_prepare_boot_cpu(void)
if (!IS_ENABLED(CONFIG_SMP))
switch_gdt_and_percpu_base(me);
- /* already set me in cpu_online_mask in boot_cpu_init() */
- cpumask_set_cpu(me, cpu_callout_mask);
- cpu_set_state_online(me);
native_pv_lock_init();
}
@@ -1592,6 +1464,12 @@ __init void prefill_possible_map(void)
set_cpu_possible(i, true);
}
+/* correctly size the local cpu masks */
+void __init setup_cpu_local_masks(void)
+{
+ alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
+}
+
#ifdef CONFIG_HOTPLUG_CPU
/* Recompute SMT state for all CPUs on offline */
@@ -1650,10 +1528,6 @@ static void remove_siblinginfo(int cpu)
static void remove_cpu_from_maps(int cpu)
{
set_cpu_online(cpu, false);
- cpumask_clear_cpu(cpu, cpu_callout_mask);
- cpumask_clear_cpu(cpu, cpu_callin_mask);
- /* was set by cpu_init() */
- cpumask_clear_cpu(cpu, cpu_initialized_mask);
numa_remove_cpu(cpu);
}
@@ -1704,54 +1578,17 @@ int native_cpu_disable(void)
return 0;
}
-int common_cpu_die(unsigned int cpu)
-{
- int ret = 0;
-
- /* We don't do anything here: idle task is faking death itself. */
-
- /* They ack this in play_dead() by setting CPU_DEAD */
- if (cpu_wait_death(cpu, 5)) {
- if (system_state == SYSTEM_RUNNING)
- pr_info("CPU %u is now offline\n", cpu);
- } else {
- pr_err("CPU %u didn't die...\n", cpu);
- ret = -1;
- }
-
- return ret;
-}
-
-void native_cpu_die(unsigned int cpu)
-{
- common_cpu_die(cpu);
-}
-
void play_dead_common(void)
{
idle_task_exit();
- /* Ack it */
- (void)cpu_report_death();
-
+ cpuhp_ap_report_dead();
/*
* With physical CPU hotplug, we should halt the cpu
*/
local_irq_disable();
}
-/**
- * cond_wakeup_cpu0 - Wake up CPU0 if needed.
- *
- * If NMI wants to wake up CPU0, start CPU0.
- */
-void cond_wakeup_cpu0(void)
-{
- if (smp_processor_id() == 0 && enable_start_cpu0)
- start_cpu0();
-}
-EXPORT_SYMBOL_GPL(cond_wakeup_cpu0);
-
/*
* We need to flush the caches before going to sleep, lest we have
* dirty data in our caches when we come back up.
@@ -1819,8 +1656,6 @@ static inline void mwait_play_dead(void)
__monitor(mwait_ptr, 0, 0);
mb();
__mwait(eax, 0);
-
- cond_wakeup_cpu0();
}
}
@@ -1829,11 +1664,8 @@ void __noreturn hlt_play_dead(void)
if (__this_cpu_read(cpu_info.x86) >= 4)
wbinvd();
- while (1) {
+ while (1)
native_halt();
-
- cond_wakeup_cpu0();
- }
}
void native_play_dead(void)
@@ -1852,12 +1684,6 @@ int native_cpu_disable(void)
return -ENOSYS;
}
-void native_cpu_die(unsigned int cpu)
-{
- /* We said "no" in __cpu_disable */
- BUG();
-}
-
void native_play_dead(void)
{
BUG();