From bb6eddf7676e1c1f3e637aa93c5224488d99036f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2009 15:35:10 +0100 Subject: clockevents: Prevent clockevent_devices list corruption on cpu hotplug Xiaotian Feng triggered a list corruption in the clock events list on CPU hotplug and debugged the root cause. If a CPU registers more than one per cpu clock event device, then only the active clock event device is removed on CPU_DEAD. The unused devices are kept in the clock events device list. On CPU up the clock event devices are registered again, which means that we list_add an already enqueued list_head. That results in list corruption. Resolve this by removing all devices which are associated to the dead CPU on CPU_DEAD. Reported-by: Xiaotian Feng Signed-off-by: Thomas Gleixner Tested-by: Xiaotian Feng Cc: stable@kernel.org --- kernel/time/clockevents.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 20a8920029ee..91db2e33d86a 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -238,8 +238,9 @@ void clockevents_exchange_device(struct clock_event_device *old, */ void clockevents_notify(unsigned long reason, void *arg) { - struct list_head *node, *tmp; + struct clock_event_device *dev, *tmp; unsigned long flags; + int cpu; spin_lock_irqsave(&clockevents_lock, flags); clockevents_do_notify(reason, arg); @@ -250,8 +251,19 @@ void clockevents_notify(unsigned long reason, void *arg) * Unregister the clock event devices which were * released from the users in the notify chain. */ - list_for_each_safe(node, tmp, &clockevents_released) - list_del(node); + list_for_each_entry_safe(dev, tmp, &clockevents_released, list) + list_del(&dev->list); + /* + * Now check whether the CPU has left unused per cpu devices + */ + cpu = *((int *)arg); + list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) { + if (cpumask_test_cpu(cpu, dev->cpumask) && + cpumask_weight(dev->cpumask) == 1) { + BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); + list_del(&dev->list); + } + } break; default: break; -- cgit v1.2.3 From ecb49d1a639acbacfc3771cae5ec07bed5df3847 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 17 Nov 2009 16:36:54 +0100 Subject: hrtimers: Convert to raw_spinlocks Convert locks which cannot be sleeping locks in preempt-rt to raw_spinlocks. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Acked-by: Ingo Molnar --- include/linux/hrtimer.h | 2 +- kernel/hrtimer.c | 50 +++++++++++++++++++++++------------------------ kernel/time/timer_list.c | 6 +++--- kernel/time/timer_stats.c | 17 ++++++++-------- 4 files changed, 38 insertions(+), 37 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index af634e95871d..5d86fb2309d2 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -169,7 +169,7 @@ struct hrtimer_clock_base { * @max_hang_time: Maximum time spent in hrtimer_interrupt */ struct hrtimer_cpu_base { - spinlock_t lock; + raw_spinlock_t lock; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d2f9239dc6ba..0086628b6e97 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -127,11 +127,11 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, for (;;) { base = timer->base; if (likely(base != NULL)) { - spin_lock_irqsave(&base->cpu_base->lock, *flags); + raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); if (likely(base == timer->base)) return base; /* The timer has migrated to another CPU: */ - spin_unlock_irqrestore(&base->cpu_base->lock, *flags); + raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags); } cpu_relax(); } @@ -208,13 +208,13 @@ again: /* See the comment in lock_timer_base() */ timer->base = NULL; - spin_unlock(&base->cpu_base->lock); - spin_lock(&new_base->cpu_base->lock); + raw_spin_unlock(&base->cpu_base->lock); + raw_spin_lock(&new_base->cpu_base->lock); if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { cpu = this_cpu; - spin_unlock(&new_base->cpu_base->lock); - spin_lock(&base->cpu_base->lock); + raw_spin_unlock(&new_base->cpu_base->lock); + raw_spin_lock(&base->cpu_base->lock); timer->base = base; goto again; } @@ -230,7 +230,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) { struct hrtimer_clock_base *base = timer->base; - spin_lock_irqsave(&base->cpu_base->lock, *flags); + raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); return base; } @@ -628,12 +628,12 @@ static void retrigger_next_event(void *arg) base = &__get_cpu_var(hrtimer_bases); /* Adjust CLOCK_REALTIME offset */ - spin_lock(&base->lock); + raw_spin_lock(&base->lock); base->clock_base[CLOCK_REALTIME].offset = timespec_to_ktime(realtime_offset); hrtimer_force_reprogram(base, 0); - spin_unlock(&base->lock); + raw_spin_unlock(&base->lock); } /* @@ -694,9 +694,9 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, { if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { if (wakeup) { - spin_unlock(&base->cpu_base->lock); + raw_spin_unlock(&base->cpu_base->lock); raise_softirq_irqoff(HRTIMER_SOFTIRQ); - spin_lock(&base->cpu_base->lock); + raw_spin_lock(&base->cpu_base->lock); } else __raise_softirq_irqoff(HRTIMER_SOFTIRQ); @@ -790,7 +790,7 @@ static inline void timer_stats_account_hrtimer(struct hrtimer *timer) static inline void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) { - spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); + raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); } /** @@ -1123,7 +1123,7 @@ ktime_t hrtimer_get_next_event(void) unsigned long flags; int i; - spin_lock_irqsave(&cpu_base->lock, flags); + raw_spin_lock_irqsave(&cpu_base->lock, flags); if (!hrtimer_hres_active()) { for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { @@ -1140,7 +1140,7 @@ ktime_t hrtimer_get_next_event(void) } } - spin_unlock_irqrestore(&cpu_base->lock, flags); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); if (mindelta.tv64 < 0) mindelta.tv64 = 0; @@ -1222,11 +1222,11 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) * they get migrated to another cpu, therefore its safe to unlock * the timer base. */ - spin_unlock(&cpu_base->lock); + raw_spin_unlock(&cpu_base->lock); trace_hrtimer_expire_entry(timer, now); restart = fn(timer); trace_hrtimer_expire_exit(timer); - spin_lock(&cpu_base->lock); + raw_spin_lock(&cpu_base->lock); /* * Note: We clear the CALLBACK bit after enqueue_hrtimer and @@ -1261,7 +1261,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) retry: expires_next.tv64 = KTIME_MAX; - spin_lock(&cpu_base->lock); + raw_spin_lock(&cpu_base->lock); /* * We set expires_next to KTIME_MAX here with cpu_base->lock * held to prevent that a timer is enqueued in our queue via @@ -1317,7 +1317,7 @@ retry: * against it. */ cpu_base->expires_next = expires_next; - spin_unlock(&cpu_base->lock); + raw_spin_unlock(&cpu_base->lock); /* Reprogramming necessary ? */ if (expires_next.tv64 == KTIME_MAX || @@ -1457,7 +1457,7 @@ void hrtimer_run_queues(void) gettime = 0; } - spin_lock(&cpu_base->lock); + raw_spin_lock(&cpu_base->lock); while ((node = base->first)) { struct hrtimer *timer; @@ -1469,7 +1469,7 @@ void hrtimer_run_queues(void) __run_hrtimer(timer, &base->softirq_time); } - spin_unlock(&cpu_base->lock); + raw_spin_unlock(&cpu_base->lock); } } @@ -1625,7 +1625,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu) struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); int i; - spin_lock_init(&cpu_base->lock); + raw_spin_lock_init(&cpu_base->lock); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) cpu_base->clock_base[i].cpu_base = cpu_base; @@ -1683,16 +1683,16 @@ static void migrate_hrtimers(int scpu) * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ - spin_lock(&new_base->lock); - spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); + raw_spin_lock(&new_base->lock); + raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { migrate_hrtimer_list(&old_base->clock_base[i], &new_base->clock_base[i]); } - spin_unlock(&old_base->lock); - spin_unlock(&new_base->lock); + raw_spin_unlock(&old_base->lock); + raw_spin_unlock(&new_base->lock); /* Check, if we got expired work to do */ __hrtimer_peek_ahead_timers(); diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 9d80db4747d4..28265636b6c2 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -84,7 +84,7 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base, next_one: i = 0; - spin_lock_irqsave(&base->cpu_base->lock, flags); + raw_spin_lock_irqsave(&base->cpu_base->lock, flags); curr = base->first; /* @@ -100,13 +100,13 @@ next_one: timer = rb_entry(curr, struct hrtimer, node); tmp = *timer; - spin_unlock_irqrestore(&base->cpu_base->lock, flags); + raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags); print_timer(m, timer, &tmp, i, now); next++; goto next_one; } - spin_unlock_irqrestore(&base->cpu_base->lock, flags); + raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags); } static void diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 63b117e9eba1..2f3b585b8d7d 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c @@ -86,7 +86,7 @@ static DEFINE_SPINLOCK(table_lock); /* * Per-CPU lookup locks for fast hash lookup: */ -static DEFINE_PER_CPU(spinlock_t, tstats_lookup_lock); +static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock); /* * Mutex to serialize state changes with show-stats activities: @@ -238,7 +238,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, /* * It doesnt matter which lock we take: */ - spinlock_t *lock; + raw_spinlock_t *lock; struct entry *entry, input; unsigned long flags; @@ -253,7 +253,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, input.pid = pid; input.timer_flag = timer_flag; - spin_lock_irqsave(lock, flags); + raw_spin_lock_irqsave(lock, flags); if (!timer_stats_active) goto out_unlock; @@ -264,7 +264,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, atomic_inc(&overflow_count); out_unlock: - spin_unlock_irqrestore(lock, flags); + raw_spin_unlock_irqrestore(lock, flags); } static void print_name_offset(struct seq_file *m, unsigned long addr) @@ -348,10 +348,11 @@ static void sync_access(void) int cpu; for_each_online_cpu(cpu) { - spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu); - spin_lock_irqsave(lock, flags); + raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu); + + raw_spin_lock_irqsave(lock, flags); /* nothing */ - spin_unlock_irqrestore(lock, flags); + raw_spin_unlock_irqrestore(lock, flags); } } @@ -409,7 +410,7 @@ void __init init_timer_stats(void) int cpu; for_each_possible_cpu(cpu) - spin_lock_init(&per_cpu(tstats_lookup_lock, cpu)); + raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu)); } static int __init init_tstats_procfs(void) -- cgit v1.2.3 From d192c47f25daa98996c7eae543d8a27e41247ec2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 8 Dec 2009 12:49:26 +0100 Subject: clockevents: Make tick_device_lock static Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Acked-by: Ingo Molnar --- kernel/time/tick-common.c | 2 +- kernel/time/tick-internal.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 83c4417b6a3c..af39cf1cfa50 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -34,7 +34,7 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device); ktime_t tick_next_period; ktime_t tick_period; int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; -DEFINE_SPINLOCK(tick_device_lock); +static DEFINE_SPINLOCK(tick_device_lock); /* * Debugging: see timer_list.c diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index b1c05bf75ee0..290eefbc1f60 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -6,7 +6,6 @@ #define TICK_DO_TIMER_BOOT -2 DECLARE_PER_CPU(struct tick_device, tick_cpu_device); -extern spinlock_t tick_device_lock; extern ktime_t tick_next_period; extern ktime_t tick_period; extern int tick_do_timer_cpu __read_mostly; -- cgit v1.2.3 From b5f91da0a6973bb6f9ff3b91b0e92c0773a458f3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 8 Dec 2009 12:40:31 +0100 Subject: clockevents: Convert to raw_spinlock Convert locks which cannot be sleeping locks in preempt-rt to raw_spinlocks. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Acked-by: Ingo Molnar --- kernel/time/clockevents.c | 14 +++++++------- kernel/time/tick-broadcast.c | 42 +++++++++++++++++++++--------------------- kernel/time/tick-common.c | 20 ++++++++++---------- 3 files changed, 38 insertions(+), 38 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 20a8920029ee..3d5fc0fd1cca 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -30,7 +30,7 @@ static LIST_HEAD(clockevents_released); static RAW_NOTIFIER_HEAD(clockevents_chain); /* Protection for the above */ -static DEFINE_SPINLOCK(clockevents_lock); +static DEFINE_RAW_SPINLOCK(clockevents_lock); /** * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds @@ -141,9 +141,9 @@ int clockevents_register_notifier(struct notifier_block *nb) unsigned long flags; int ret; - spin_lock_irqsave(&clockevents_lock, flags); + raw_spin_lock_irqsave(&clockevents_lock, flags); ret = raw_notifier_chain_register(&clockevents_chain, nb); - spin_unlock_irqrestore(&clockevents_lock, flags); + raw_spin_unlock_irqrestore(&clockevents_lock, flags); return ret; } @@ -185,13 +185,13 @@ void clockevents_register_device(struct clock_event_device *dev) BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); BUG_ON(!dev->cpumask); - spin_lock_irqsave(&clockevents_lock, flags); + raw_spin_lock_irqsave(&clockevents_lock, flags); list_add(&dev->list, &clockevent_devices); clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); clockevents_notify_released(); - spin_unlock_irqrestore(&clockevents_lock, flags); + raw_spin_unlock_irqrestore(&clockevents_lock, flags); } EXPORT_SYMBOL_GPL(clockevents_register_device); @@ -241,7 +241,7 @@ void clockevents_notify(unsigned long reason, void *arg) struct list_head *node, *tmp; unsigned long flags; - spin_lock_irqsave(&clockevents_lock, flags); + raw_spin_lock_irqsave(&clockevents_lock, flags); clockevents_do_notify(reason, arg); switch (reason) { @@ -256,7 +256,7 @@ void clockevents_notify(unsigned long reason, void *arg) default: break; } - spin_unlock_irqrestore(&clockevents_lock, flags); + raw_spin_unlock_irqrestore(&clockevents_lock, flags); } EXPORT_SYMBOL_GPL(clockevents_notify); #endif diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index c2ec25087a35..b3bafd5fc66d 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -31,7 +31,7 @@ static struct tick_device tick_broadcast_device; /* FIXME: Use cpumask_var_t. */ static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS); static DECLARE_BITMAP(tmpmask, NR_CPUS); -static DEFINE_SPINLOCK(tick_broadcast_lock); +static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); static int tick_broadcast_force; #ifdef CONFIG_TICK_ONESHOT @@ -96,7 +96,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) unsigned long flags; int ret = 0; - spin_lock_irqsave(&tick_broadcast_lock, flags); + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); /* * Devices might be registered with both periodic and oneshot @@ -122,7 +122,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) tick_broadcast_clear_oneshot(cpu); } } - spin_unlock_irqrestore(&tick_broadcast_lock, flags); + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); return ret; } @@ -161,13 +161,13 @@ static void tick_do_broadcast(struct cpumask *mask) */ static void tick_do_periodic_broadcast(void) { - spin_lock(&tick_broadcast_lock); + raw_spin_lock(&tick_broadcast_lock); cpumask_and(to_cpumask(tmpmask), cpu_online_mask, tick_get_broadcast_mask()); tick_do_broadcast(to_cpumask(tmpmask)); - spin_unlock(&tick_broadcast_lock); + raw_spin_unlock(&tick_broadcast_lock); } /* @@ -212,7 +212,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason) unsigned long flags; int cpu, bc_stopped; - spin_lock_irqsave(&tick_broadcast_lock, flags); + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); cpu = smp_processor_id(); td = &per_cpu(tick_cpu_device, cpu); @@ -263,7 +263,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason) tick_broadcast_setup_oneshot(bc); } out: - spin_unlock_irqrestore(&tick_broadcast_lock, flags); + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } /* @@ -299,7 +299,7 @@ void tick_shutdown_broadcast(unsigned int *cpup) unsigned long flags; unsigned int cpu = *cpup; - spin_lock_irqsave(&tick_broadcast_lock, flags); + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); @@ -309,7 +309,7 @@ void tick_shutdown_broadcast(unsigned int *cpup) clockevents_shutdown(bc); } - spin_unlock_irqrestore(&tick_broadcast_lock, flags); + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } void tick_suspend_broadcast(void) @@ -317,13 +317,13 @@ void tick_suspend_broadcast(void) struct clock_event_device *bc; unsigned long flags; - spin_lock_irqsave(&tick_broadcast_lock, flags); + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; if (bc) clockevents_shutdown(bc); - spin_unlock_irqrestore(&tick_broadcast_lock, flags); + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } int tick_resume_broadcast(void) @@ -332,7 +332,7 @@ int tick_resume_broadcast(void) unsigned long flags; int broadcast = 0; - spin_lock_irqsave(&tick_broadcast_lock, flags); + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; @@ -351,7 +351,7 @@ int tick_resume_broadcast(void) break; } } - spin_unlock_irqrestore(&tick_broadcast_lock, flags); + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); return broadcast; } @@ -405,7 +405,7 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) ktime_t now, next_event; int cpu; - spin_lock(&tick_broadcast_lock); + raw_spin_lock(&tick_broadcast_lock); again: dev->next_event.tv64 = KTIME_MAX; next_event.tv64 = KTIME_MAX; @@ -443,7 +443,7 @@ again: if (tick_broadcast_set_event(next_event, 0)) goto again; } - spin_unlock(&tick_broadcast_lock); + raw_spin_unlock(&tick_broadcast_lock); } /* @@ -457,7 +457,7 @@ void tick_broadcast_oneshot_control(unsigned long reason) unsigned long flags; int cpu; - spin_lock_irqsave(&tick_broadcast_lock, flags); + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); /* * Periodic mode does not care about the enter/exit of power @@ -492,7 +492,7 @@ void tick_broadcast_oneshot_control(unsigned long reason) } out: - spin_unlock_irqrestore(&tick_broadcast_lock, flags); + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } /* @@ -563,13 +563,13 @@ void tick_broadcast_switch_to_oneshot(void) struct clock_event_device *bc; unsigned long flags; - spin_lock_irqsave(&tick_broadcast_lock, flags); + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; bc = tick_broadcast_device.evtdev; if (bc) tick_broadcast_setup_oneshot(bc); - spin_unlock_irqrestore(&tick_broadcast_lock, flags); + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } @@ -581,7 +581,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) unsigned long flags; unsigned int cpu = *cpup; - spin_lock_irqsave(&tick_broadcast_lock, flags); + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); /* * Clear the broadcast mask flag for the dead cpu, but do not @@ -589,7 +589,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) */ cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask()); - spin_unlock_irqrestore(&tick_broadcast_lock, flags); + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } /* diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index af39cf1cfa50..b6b898d2eeef 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -34,7 +34,7 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device); ktime_t tick_next_period; ktime_t tick_period; int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; -static DEFINE_SPINLOCK(tick_device_lock); +static DEFINE_RAW_SPINLOCK(tick_device_lock); /* * Debugging: see timer_list.c @@ -209,7 +209,7 @@ static int tick_check_new_device(struct clock_event_device *newdev) int cpu, ret = NOTIFY_OK; unsigned long flags; - spin_lock_irqsave(&tick_device_lock, flags); + raw_spin_lock_irqsave(&tick_device_lock, flags); cpu = smp_processor_id(); if (!cpumask_test_cpu(cpu, newdev->cpumask)) @@ -268,7 +268,7 @@ static int tick_check_new_device(struct clock_event_device *newdev) if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) tick_oneshot_notify(); - spin_unlock_irqrestore(&tick_device_lock, flags); + raw_spin_unlock_irqrestore(&tick_device_lock, flags); return NOTIFY_STOP; out_bc: @@ -278,7 +278,7 @@ out_bc: if (tick_check_broadcast_device(newdev)) ret = NOTIFY_STOP; - spin_unlock_irqrestore(&tick_device_lock, flags); + raw_spin_unlock_irqrestore(&tick_device_lock, flags); return ret; } @@ -311,7 +311,7 @@ static void tick_shutdown(unsigned int *cpup) struct clock_event_device *dev = td->evtdev; unsigned long flags; - spin_lock_irqsave(&tick_device_lock, flags); + raw_spin_lock_irqsave(&tick_device_lock, flags); td->mode = TICKDEV_MODE_PERIODIC; if (dev) { /* @@ -322,7 +322,7 @@ static void tick_shutdown(unsigned int *cpup) clockevents_exchange_device(dev, NULL); td->evtdev = NULL; } - spin_unlock_irqrestore(&tick_device_lock, flags); + raw_spin_unlock_irqrestore(&tick_device_lock, flags); } static void tick_suspend(void) @@ -330,9 +330,9 @@ static void tick_suspend(void) struct tick_device *td = &__get_cpu_var(tick_cpu_device); unsigned long flags; - spin_lock_irqsave(&tick_device_lock, flags); + raw_spin_lock_irqsave(&tick_device_lock, flags); clockevents_shutdown(td->evtdev); - spin_unlock_irqrestore(&tick_device_lock, flags); + raw_spin_unlock_irqrestore(&tick_device_lock, flags); } static void tick_resume(void) @@ -341,7 +341,7 @@ static void tick_resume(void) unsigned long flags; int broadcast = tick_resume_broadcast(); - spin_lock_irqsave(&tick_device_lock, flags); + raw_spin_lock_irqsave(&tick_device_lock, flags); clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); if (!broadcast) { @@ -350,7 +350,7 @@ static void tick_resume(void) else tick_resume_oneshot(); } - spin_unlock_irqrestore(&tick_device_lock, flags); + raw_spin_unlock_irqrestore(&tick_device_lock, flags); } /* -- cgit v1.2.3 From f065f41f48569122b5bcddbd1ba2354f7cc29fdc Mon Sep 17 00:00:00 2001 From: Barry Song <21cnbao@gmail.com> Date: Tue, 15 Dec 2009 16:45:34 -0800 Subject: timecompare: fix half-Y2K38 problem in timecompare_update while calculating offset ktime will overflow from 03:14:07 UTC on Tuesday, 19 January 2038, ktime_add() in timecompare_update() will overflow a half earlier. As a result, wrong offset will be gotten, then cause some strange problems. Signed-off-by: Barry Song <21cnbao@gmail.com> Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Patrick Ohly Cc: David S. Miller Cc: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/timecompare.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c index 96ff643a5a59..12f5c55090be 100644 --- a/kernel/time/timecompare.c +++ b/kernel/time/timecompare.c @@ -89,7 +89,7 @@ int timecompare_offset(struct timecompare *sync, * source time */ sample.offset = - ktime_to_ns(ktime_add(end, start)) / 2 - + (ktime_to_ns(end) + ktime_to_ns(start)) / 2 - ts; /* simple insertion sort based on duration */ -- cgit v1.2.3 From 62ac12795095dc959649c66ace78708e7ac52477 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 17 Dec 2009 11:43:26 -0600 Subject: cpumask: avoid dereferencing struct cpumask struct cpumask will be undefined soon with CONFIG_CPUMASK_OFFSTACK=y, to avoid them being declared on the stack. cpumask_bits() does what we want here (of course, this code is crap). Signed-off-by: Rusty Russell To: Thomas Gleixner --- kernel/time/timer_list.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 28265636b6c2..bdfb8dd1050c 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -237,10 +237,10 @@ static void timer_list_show_tickdevices(struct seq_file *m) #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST print_tickdevice(m, tick_get_broadcast_device(), -1); SEQ_printf(m, "tick_broadcast_mask: %08lx\n", - tick_get_broadcast_mask()->bits[0]); + cpumask_bits(tick_get_broadcast_mask())[0]); #ifdef CONFIG_TICK_ONESHOT SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n", - tick_get_broadcast_oneshot_mask()->bits[0]); + cpumask_bits(tick_get_broadcast_oneshot_mask())[0]); #endif SEQ_printf(m, "\n"); #endif -- cgit v1.2.3 From 83f57a11d84460dfe2afdb5a8bc759953428e38b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 22 Dec 2009 14:10:37 -0800 Subject: Revert "time: Remove xtime_cache" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 7bc7d637452383d56ba4368d4336b0dde1bb476d, as requested by John Stultz. Quoting John: "Petr Titěra reported an issue where he saw odd atime regressions with 2.6.33 where there were a full second worth of nanoseconds in the nanoseconds field. He also reviewed the time code and narrowed down the problem: unhandled overflow of the nanosecond field caused by rounding up the sub-nanosecond accumulated time. Details: * At the end of update_wall_time(), we currently round up the sub-nanosecond portion of accumulated time when storing it into xtime. This was added to avoid time inconsistencies caused when the sub-nanosecond portion was truncated when storing into xtime. Unfortunately we don't handle the possible second overflow caused by that rounding. * Previously the xtime_cache code hid this overflow by normalizing the xtime value when storing into the xtime_cache. * We could try to handle the second overflow after the rounding up, but since this affects the timekeeping's internal state, this would further complicate the next accumulation cycle, causing small errors in ntp steering. As much as I'd like to get rid of it, the xtime_cache code is known to work. * The correct fix is really to include the sub-nanosecond portion in the timekeeping accessor function, so we don't need to round up at during accumulation. This would greatly simplify the accumulation code. Unfortunately, we can't do this safely until the last three non-GENERIC_TIME arches (sparc32, arm, cris) are converted (those patches are in -mm) and we kill off the spots where arches set xtime directly. This is all 2.6.34 material, so I think reverting the xtime_cache change is the best approach for now. Many thanks to Petr for both reporting and finding the issue!" Reported-by: Petr Titěra Requested-by: john stultz Cc: Ingo Molnar Signed-off-by: Linus Torvalds --- kernel/time.c | 1 + kernel/time/timekeeping.c | 27 +++++++++++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time.c b/kernel/time.c index c6324d96009e..804798005d19 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -136,6 +136,7 @@ static inline void warp_clock(void) write_seqlock_irq(&xtime_lock); wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; xtime.tv_sec += sys_tz.tz_minuteswest * 60; + update_xtime_cache(0); write_sequnlock_irq(&xtime_lock); clock_was_set(); } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index af4135f05825..7faaa32fbf4f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -165,6 +165,13 @@ struct timespec raw_time; /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; +static struct timespec xtime_cache __attribute__ ((aligned (16))); +void update_xtime_cache(u64 nsec) +{ + xtime_cache = xtime; + timespec_add_ns(&xtime_cache, nsec); +} + /* must hold xtime_lock */ void timekeeping_leap_insert(int leapsecond) { @@ -325,6 +332,8 @@ int do_settimeofday(struct timespec *tv) xtime = *tv; + update_xtime_cache(0); + timekeeper.ntp_error = 0; ntp_clear(); @@ -550,6 +559,7 @@ void __init timekeeping_init(void) } set_normalized_timespec(&wall_to_monotonic, -boot.tv_sec, -boot.tv_nsec); + update_xtime_cache(0); total_sleep_time.tv_sec = 0; total_sleep_time.tv_nsec = 0; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -583,6 +593,7 @@ static int timekeeping_resume(struct sys_device *dev) wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); total_sleep_time = timespec_add_safe(total_sleep_time, ts); } + update_xtime_cache(0); /* re-base the last cycle value */ timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); timekeeper.ntp_error = 0; @@ -722,6 +733,7 @@ static void timekeeping_adjust(s64 offset) timekeeper.ntp_error_shift; } + /** * logarithmic_accumulation - shifted accumulation of cycles * @@ -765,6 +777,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) return offset; } + /** * update_wall_time - Uses the current clocksource to increment the wall time * @@ -774,6 +787,7 @@ void update_wall_time(void) { struct clocksource *clock; cycle_t offset; + u64 nsecs; int shift = 0, maxshift; /* Make sure we're fully resumed: */ @@ -839,6 +853,9 @@ void update_wall_time(void) timekeeper.ntp_error += timekeeper.xtime_nsec << timekeeper.ntp_error_shift; + nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); + update_xtime_cache(nsecs); + /* check to see if there is a new clocksource to use */ update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); } @@ -875,13 +892,13 @@ void monotonic_to_bootbased(struct timespec *ts) unsigned long get_seconds(void) { - return xtime.tv_sec; + return xtime_cache.tv_sec; } EXPORT_SYMBOL(get_seconds); struct timespec __current_kernel_time(void) { - return xtime; + return xtime_cache; } struct timespec current_kernel_time(void) @@ -891,7 +908,8 @@ struct timespec current_kernel_time(void) do { seq = read_seqbegin(&xtime_lock); - now = xtime; + + now = xtime_cache; } while (read_seqretry(&xtime_lock, seq)); return now; @@ -905,7 +923,8 @@ struct timespec get_monotonic_coarse(void) do { seq = read_seqbegin(&xtime_lock); - now = xtime; + + now = xtime_cache; mono = wall_to_monotonic; } while (read_seqretry(&xtime_lock, seq)); -- cgit v1.2.3