summaryrefslogtreecommitdiff
path: root/arch/x86/xen/smp.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen/smp.c')
-rw-r--r--arch/x86/xen/smp.c310
1 files changed, 259 insertions, 51 deletions
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 233156f39b7f..d8faf79a0a1d 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -15,6 +15,7 @@
* This does not handle HOTPLUG_CPU yet.
*/
#include <linux/sched.h>
+#include <linux/kernel_stat.h>
#include <linux/err.h>
#include <linux/smp.h>
@@ -35,6 +36,8 @@
#include "xen-ops.h"
#include "mmu.h"
+static void __cpuinit xen_init_lock_cpu(int cpu);
+
cpumask_t xen_cpu_initialized_map;
static DEFINE_PER_CPU(int, resched_irq);
@@ -66,13 +69,22 @@ static __cpuinit void cpu_bringup_and_idle(void)
int cpu = smp_processor_id();
cpu_init();
+ preempt_disable();
+
xen_enable_sysenter();
+ xen_enable_syscall();
- preempt_disable();
- per_cpu(cpu_state, cpu) = CPU_ONLINE;
+ cpu = smp_processor_id();
+ smp_store_cpu_info(cpu);
+ cpu_data(cpu).x86_max_cores = 1;
+ set_cpu_sibling_map(cpu);
xen_setup_cpu_clockevents();
+ cpu_set(cpu, cpu_online_map);
+ x86_write_percpu(cpu_state, CPU_ONLINE);
+ wmb();
+
/* We can take interrupts now: we're officially "up". */
local_irq_enable();
@@ -141,56 +153,39 @@ static int xen_smp_intr_init(unsigned int cpu)
return rc;
}
-void __init xen_fill_possible_map(void)
+static void __init xen_fill_possible_map(void)
{
int i, rc;
for (i = 0; i < NR_CPUS; i++) {
rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
- if (rc >= 0)
+ if (rc >= 0) {
+ num_processors++;
cpu_set(i, cpu_possible_map);
+ }
}
}
-void __init xen_smp_prepare_boot_cpu(void)
+static void __init xen_smp_prepare_boot_cpu(void)
{
- int cpu;
-
BUG_ON(smp_processor_id() != 0);
native_smp_prepare_boot_cpu();
/* We've switched to the "real" per-cpu gdt, so make sure the
old memory can be recycled */
- make_lowmem_page_readwrite(&per_cpu__gdt_page);
-
- for_each_possible_cpu(cpu) {
- cpus_clear(per_cpu(cpu_sibling_map, cpu));
- /*
- * cpu_core_map lives in a per cpu area that is cleared
- * when the per cpu array is allocated.
- *
- * cpus_clear(per_cpu(cpu_core_map, cpu));
- */
- }
+ make_lowmem_page_readwrite(&per_cpu_var(gdt_page));
xen_setup_vcpu_info_placement();
}
-void __init xen_smp_prepare_cpus(unsigned int max_cpus)
+static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
{
unsigned cpu;
- for_each_possible_cpu(cpu) {
- cpus_clear(per_cpu(cpu_sibling_map, cpu));
- /*
- * cpu_core_ map will be zeroed when the per
- * cpu area is allocated.
- *
- * cpus_clear(per_cpu(cpu_core_map, cpu));
- */
- }
+ xen_init_lock_cpu(0);
smp_store_cpu_info(0);
+ cpu_data(0).x86_max_cores = 1;
set_cpu_sibling_map(0);
if (xen_smp_intr_init(0))
@@ -225,7 +220,7 @@ static __cpuinit int
cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
{
struct vcpu_guest_context *ctxt;
- struct gdt_page *gdt = &per_cpu(gdt_page, cpu);
+ struct desc_struct *gdt;
if (cpu_test_and_set(cpu, xen_cpu_initialized_map))
return 0;
@@ -234,12 +229,15 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
if (ctxt == NULL)
return -ENOMEM;
+ gdt = get_cpu_gdt_table(cpu);
+
ctxt->flags = VGCF_IN_KERNEL;
ctxt->user_regs.ds = __USER_DS;
ctxt->user_regs.es = __USER_DS;
- ctxt->user_regs.fs = __KERNEL_PERCPU;
- ctxt->user_regs.gs = 0;
ctxt->user_regs.ss = __KERNEL_DS;
+#ifdef CONFIG_X86_32
+ ctxt->user_regs.fs = __KERNEL_PERCPU;
+#endif
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
@@ -249,11 +247,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->ldt_ents = 0;
- BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK);
- make_lowmem_page_readonly(gdt->gdt);
+ BUG_ON((unsigned long)gdt & ~PAGE_MASK);
+ make_lowmem_page_readonly(gdt);
- ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt);
- ctxt->gdt_ents = ARRAY_SIZE(gdt->gdt);
+ ctxt->gdt_frames[0] = virt_to_mfn(gdt);
+ ctxt->gdt_ents = GDT_ENTRIES;
ctxt->user_regs.cs = __KERNEL_CS;
ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
@@ -261,9 +259,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->kernel_ss = __KERNEL_DS;
ctxt->kernel_sp = idle->thread.sp0;
+#ifdef CONFIG_X86_32
ctxt->event_callback_cs = __KERNEL_CS;
- ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
ctxt->failsafe_callback_cs = __KERNEL_CS;
+#endif
+ ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
@@ -276,7 +276,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
return 0;
}
-int __cpuinit xen_cpu_up(unsigned int cpu)
+static int __cpuinit xen_cpu_up(unsigned int cpu)
{
struct task_struct *idle = idle_task(cpu);
int rc;
@@ -287,10 +287,28 @@ int __cpuinit xen_cpu_up(unsigned int cpu)
return rc;
#endif
+#ifdef CONFIG_X86_64
+ /* Allocate node local memory for AP pdas */
+ WARN_ON(cpu == 0);
+ if (cpu > 0) {
+ rc = get_local_pda(cpu);
+ if (rc)
+ return rc;
+ }
+#endif
+
+#ifdef CONFIG_X86_32
init_gdt(cpu);
per_cpu(current_task, cpu) = idle;
irq_ctx_init(cpu);
+#else
+ cpu_pda(cpu)->pcurrent = idle;
+ clear_tsk_thread_flag(idle, TIF_FORK);
+#endif
xen_setup_timer(cpu);
+ xen_init_lock_cpu(cpu);
+
+ per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* make sure interrupts start blocked */
per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
@@ -306,20 +324,18 @@ int __cpuinit xen_cpu_up(unsigned int cpu)
if (rc)
return rc;
- smp_store_cpu_info(cpu);
- set_cpu_sibling_map(cpu);
- /* This must be done before setting cpu_online_map */
- wmb();
-
- cpu_set(cpu, cpu_online_map);
-
rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
BUG_ON(rc);
+ while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
+ HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+ barrier();
+ }
+
return 0;
}
-void xen_smp_cpus_done(unsigned int max_cpus)
+static void xen_smp_cpus_done(unsigned int max_cpus)
{
}
@@ -335,12 +351,12 @@ static void stop_self(void *v)
BUG();
}
-void xen_smp_send_stop(void)
+static void xen_smp_send_stop(void)
{
smp_call_function(stop_self, NULL, 0);
}
-void xen_smp_send_reschedule(int cpu)
+static void xen_smp_send_reschedule(int cpu)
{
xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
}
@@ -351,18 +367,18 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
cpus_and(mask, mask, cpu_online_map);
- for_each_cpu_mask(cpu, mask)
+ for_each_cpu_mask_nr(cpu, mask)
xen_send_IPI_one(cpu, vector);
}
-void xen_smp_send_call_function_ipi(cpumask_t mask)
+static void xen_smp_send_call_function_ipi(cpumask_t mask)
{
int cpu;
xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
/* Make sure other vcpus get a chance to run if they need to. */
- for_each_cpu_mask(cpu, mask) {
+ for_each_cpu_mask_nr(cpu, mask) {
if (xen_vcpu_stolen(cpu)) {
HYPERVISOR_sched_op(SCHEDOP_yield, 0);
break;
@@ -370,7 +386,7 @@ void xen_smp_send_call_function_ipi(cpumask_t mask)
}
}
-void xen_smp_send_call_function_single_ipi(int cpu)
+static void xen_smp_send_call_function_single_ipi(int cpu)
{
xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
}
@@ -379,7 +395,11 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
{
irq_enter();
generic_smp_call_function_interrupt();
+#ifdef CONFIG_X86_32
__get_cpu_var(irq_stat).irq_call_count++;
+#else
+ add_pda(irq_call_count, 1);
+#endif
irq_exit();
return IRQ_HANDLED;
@@ -389,8 +409,196 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
{
irq_enter();
generic_smp_call_function_single_interrupt();
+#ifdef CONFIG_X86_32
__get_cpu_var(irq_stat).irq_call_count++;
+#else
+ add_pda(irq_call_count, 1);
+#endif
irq_exit();
return IRQ_HANDLED;
}
+
+struct xen_spinlock {
+ unsigned char lock; /* 0 -> free; 1 -> locked */
+ unsigned short spinners; /* count of waiting cpus */
+};
+
+static int xen_spin_is_locked(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+
+ return xl->lock != 0;
+}
+
+static int xen_spin_is_contended(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+
+ /* Not strictly true; this is only the count of contended
+ lock-takers entering the slow path. */
+ return xl->spinners != 0;
+}
+
+static int xen_spin_trylock(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+ u8 old = 1;
+
+ asm("xchgb %b0,%1"
+ : "+q" (old), "+m" (xl->lock) : : "memory");
+
+ return old == 0;
+}
+
+static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
+static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
+
+static inline void spinning_lock(struct xen_spinlock *xl)
+{
+ __get_cpu_var(lock_spinners) = xl;
+ wmb(); /* set lock of interest before count */
+ asm(LOCK_PREFIX " incw %0"
+ : "+m" (xl->spinners) : : "memory");
+}
+
+static inline void unspinning_lock(struct xen_spinlock *xl)
+{
+ asm(LOCK_PREFIX " decw %0"
+ : "+m" (xl->spinners) : : "memory");
+ wmb(); /* decrement count before clearing lock */
+ __get_cpu_var(lock_spinners) = NULL;
+}
+
+static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+ int irq = __get_cpu_var(lock_kicker_irq);
+ int ret;
+
+ /* If kicker interrupts not initialized yet, just spin */
+ if (irq == -1)
+ return 0;
+
+ /* announce we're spinning */
+ spinning_lock(xl);
+
+ /* clear pending */
+ xen_clear_irq_pending(irq);
+
+ /* check again make sure it didn't become free while
+ we weren't looking */
+ ret = xen_spin_trylock(lock);
+ if (ret)
+ goto out;
+
+ /* block until irq becomes pending */
+ xen_poll_irq(irq);
+ kstat_this_cpu.irqs[irq]++;
+
+out:
+ unspinning_lock(xl);
+ return ret;
+}
+
+static void xen_spin_lock(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+ int timeout;
+ u8 oldval;
+
+ do {
+ timeout = 1 << 10;
+
+ asm("1: xchgb %1,%0\n"
+ " testb %1,%1\n"
+ " jz 3f\n"
+ "2: rep;nop\n"
+ " cmpb $0,%0\n"
+ " je 1b\n"
+ " dec %2\n"
+ " jnz 2b\n"
+ "3:\n"
+ : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
+ : "1" (1)
+ : "memory");
+
+ } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock)));
+}
+
+static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ /* XXX should mix up next cpu selection */
+ if (per_cpu(lock_spinners, cpu) == xl) {
+ xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
+ break;
+ }
+ }
+}
+
+static void xen_spin_unlock(struct raw_spinlock *lock)
+{
+ struct xen_spinlock *xl = (struct xen_spinlock *)lock;
+
+ smp_wmb(); /* make sure no writes get moved after unlock */
+ xl->lock = 0; /* release lock */
+
+ /* make sure unlock happens before kick */
+ barrier();
+
+ if (unlikely(xl->spinners))
+ xen_spin_unlock_slow(xl);
+}
+
+static __cpuinit void xen_init_lock_cpu(int cpu)
+{
+ int irq;
+ const char *name;
+
+ name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
+ irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
+ cpu,
+ xen_reschedule_interrupt,
+ IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+ name,
+ NULL);
+
+ if (irq >= 0) {
+ disable_irq(irq); /* make sure it's never delivered */
+ per_cpu(lock_kicker_irq, cpu) = irq;
+ }
+
+ printk("cpu %d spinlock event irq %d\n", cpu, irq);
+}
+
+static void __init xen_init_spinlocks(void)
+{
+ pv_lock_ops.spin_is_locked = xen_spin_is_locked;
+ pv_lock_ops.spin_is_contended = xen_spin_is_contended;
+ pv_lock_ops.spin_lock = xen_spin_lock;
+ pv_lock_ops.spin_trylock = xen_spin_trylock;
+ pv_lock_ops.spin_unlock = xen_spin_unlock;
+}
+
+static const struct smp_ops xen_smp_ops __initdata = {
+ .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
+ .smp_prepare_cpus = xen_smp_prepare_cpus,
+ .cpu_up = xen_cpu_up,
+ .smp_cpus_done = xen_smp_cpus_done,
+
+ .smp_send_stop = xen_smp_send_stop,
+ .smp_send_reschedule = xen_smp_send_reschedule,
+
+ .send_call_func_ipi = xen_smp_send_call_function_ipi,
+ .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
+};
+
+void __init xen_smp_init(void)
+{
+ smp_ops = xen_smp_ops;
+ xen_fill_possible_map();
+ xen_init_spinlocks();
+}