summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu.c2
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/sched/syscalls.c8
-rw-r--r--kernel/signal.c34
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/time/alarmtimer.c9
-rw-r--r--kernel/time/clockevents.c2
-rw-r--r--kernel/time/hrtimer.c22
-rw-r--r--kernel/time/ntp.c10
-rw-r--r--kernel/time/ntp_internal.h4
-rw-r--r--kernel/time/posix-cpu-timers.c207
-rw-r--r--kernel/time/posix-timers.c73
-rw-r--r--kernel/time/posix-timers.h3
-rw-r--r--kernel/time/timekeeping.c4
-rw-r--r--kernel/time/timer.c62
15 files changed, 231 insertions, 213 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0c9c5dfc8ddd..d293d52a3e00 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -330,7 +330,7 @@ static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state st
/* Poll for one millisecond */
arch_cpuhp_sync_state_poll();
} else {
- usleep_range_state(USEC_PER_MSEC, 2 * USEC_PER_MSEC, TASK_UNINTERRUPTIBLE);
+ usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC);
}
sync = atomic_read(st);
}
diff --git a/kernel/fork.c b/kernel/fork.c
index cc760491f201..c1b343cba560 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1861,7 +1861,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
prev_cputime_init(&sig->prev_cputime);
#ifdef CONFIG_POSIX_TIMERS
- INIT_LIST_HEAD(&sig->posix_timers);
+ INIT_HLIST_HEAD(&sig->posix_timers);
hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
sig->real_timer.function = it_real_fn;
#endif
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index ae1b42775ef9..195d2f2834a9 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -406,6 +406,14 @@ static void __setscheduler_params(struct task_struct *p,
else if (fair_policy(policy))
p->static_prio = NICE_TO_PRIO(attr->sched_nice);
+ /* rt-policy tasks do not have a timerslack */
+ if (task_is_realtime(p)) {
+ p->timer_slack_ns = 0;
+ } else if (p->timer_slack_ns == 0) {
+ /* when switching back to non-rt policy, restore timerslack */
+ p->timer_slack_ns = p->default_timer_slack_ns;
+ }
+
/*
* __sched_setscheduler() ensures attr->sched_priority == 0 when
* !rt_policy. Always setting this ensures that things like
diff --git a/kernel/signal.c b/kernel/signal.c
index 60c737e423a1..6f3a5aa39b09 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -618,20 +618,18 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
}
/*
- * Dequeue a signal and return the element to the caller, which is
- * expected to free it.
- *
- * All callers have to hold the siglock.
+ * Try to dequeue a signal. If a deliverable signal is found fill in the
+ * caller provided siginfo and return the signal number. Otherwise return
+ * 0.
*/
-int dequeue_signal(struct task_struct *tsk, sigset_t *mask,
- kernel_siginfo_t *info, enum pid_type *type)
+int dequeue_signal(sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type)
{
+ struct task_struct *tsk = current;
bool resched_timer = false;
int signr;
- /* We only dequeue private signals from ourselves, we don't let
- * signalfd steal them
- */
+ lockdep_assert_held(&tsk->sighand->siglock);
+
*type = PIDTYPE_PID;
signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer);
if (!signr) {
@@ -1940,10 +1938,11 @@ struct sigqueue *sigqueue_alloc(void)
void sigqueue_free(struct sigqueue *q)
{
- unsigned long flags;
spinlock_t *lock = &current->sighand->siglock;
+ unsigned long flags;
- BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
+ if (WARN_ON_ONCE(!(q->flags & SIGQUEUE_PREALLOC)))
+ return;
/*
* We must hold ->siglock while testing q->list
* to serialize with collect_signal() or with
@@ -1971,7 +1970,10 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
unsigned long flags;
int ret, result;
- BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
+ if (WARN_ON_ONCE(!(q->flags & SIGQUEUE_PREALLOC)))
+ return 0;
+ if (WARN_ON_ONCE(q->info.si_code != SI_TIMER))
+ return 0;
ret = -1;
rcu_read_lock();
@@ -2006,7 +2008,6 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
* If an SI_TIMER entry is already queue just increment
* the overrun count.
*/
- BUG_ON(q->info.si_code != SI_TIMER);
q->info.si_overrun++;
result = TRACE_SIGNAL_ALREADY_PENDING;
goto out;
@@ -2793,8 +2794,7 @@ relock:
type = PIDTYPE_PID;
signr = dequeue_synchronous_signal(&ksig->info);
if (!signr)
- signr = dequeue_signal(current, &current->blocked,
- &ksig->info, &type);
+ signr = dequeue_signal(&current->blocked, &ksig->info, &type);
if (!signr)
break; /* will return 0 */
@@ -3648,7 +3648,7 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info,
signotset(&mask);
spin_lock_irq(&tsk->sighand->siglock);
- sig = dequeue_signal(tsk, &mask, info, &type);
+ sig = dequeue_signal(&mask, info, &type);
if (!sig && timeout) {
/*
* None ready, temporarily unblock those we're interested
@@ -3667,7 +3667,7 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info,
spin_lock_irq(&tsk->sighand->siglock);
__set_task_blocked(tsk, &tsk->real_blocked);
sigemptyset(&tsk->real_blocked);
- sig = dequeue_signal(tsk, &mask, info, &type);
+ sig = dequeue_signal(&mask, info, &type);
}
spin_unlock_irq(&tsk->sighand->siglock);
diff --git a/kernel/sys.c b/kernel/sys.c
index 3a2df1bd9f64..e3c4cffb520c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2557,6 +2557,8 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
error = current->timer_slack_ns;
break;
case PR_SET_TIMERSLACK:
+ if (task_is_realtime(current))
+ break;
if (arg2 <= 0)
current->timer_slack_ns =
current->default_timer_slack_ns;
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 5abfa4390673..8bf888641694 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -493,7 +493,7 @@ static u64 __alarm_forward_now(struct alarm *alarm, ktime_t interval, bool throt
* promised in the context of posix_timer_fn() never
* materialized, but someone should really work on it.
*
- * To prevent DOS fake @now to be 1 jiffie out which keeps
+ * To prevent DOS fake @now to be 1 jiffy out which keeps
* the overrun accounting correct but creates an
* inconsistency vs. timer_gettime(2).
*/
@@ -574,15 +574,10 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm,
it.alarm.alarmtimer);
enum alarmtimer_restart result = ALARMTIMER_NORESTART;
unsigned long flags;
- int si_private = 0;
spin_lock_irqsave(&ptr->it_lock, flags);
- ptr->it_active = 0;
- if (ptr->it_interval)
- si_private = ++ptr->it_requeue_pending;
-
- if (posix_timer_event(ptr, si_private) && ptr->it_interval) {
+ if (posix_timer_queue_signal(ptr) && ptr->it_interval) {
/*
* Handle ignored signals and rearm the timer. This will go
* away once we handle ignored signals proper. Ensure that
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 60a6484831b1..78c7bd64d0dd 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -190,7 +190,7 @@ int clockevents_tick_resume(struct clock_event_device *dev)
#ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
-/* Limit min_delta to a jiffie */
+/* Limit min_delta to a jiffy */
#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ)
/**
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 836157e09e25..12eb40d6290e 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1177,7 +1177,7 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
/*
* CONFIG_TIME_LOW_RES indicates that the system has no way to return
* granular time values. For relative timers we add hrtimer_resolution
- * (i.e. one jiffie) to prevent short timeouts.
+ * (i.e. one jiffy) to prevent short timeouts.
*/
timer->is_rel = mode & HRTIMER_MODE_REL;
if (timer->is_rel)
@@ -1351,11 +1351,13 @@ static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
}
static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
+ __acquires(&base->softirq_expiry_lock)
{
spin_lock(&base->softirq_expiry_lock);
}
static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
+ __releases(&base->softirq_expiry_lock)
{
spin_unlock(&base->softirq_expiry_lock);
}
@@ -2072,14 +2074,9 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
struct restart_block *restart;
struct hrtimer_sleeper t;
int ret = 0;
- u64 slack;
-
- slack = current->timer_slack_ns;
- if (rt_task(current))
- slack = 0;
hrtimer_init_sleeper_on_stack(&t, clockid, mode);
- hrtimer_set_expires_range_ns(&t.timer, rqtp, slack);
+ hrtimer_set_expires_range_ns(&t.timer, rqtp, current->timer_slack_ns);
ret = do_nanosleep(&t, mode);
if (ret != -ERESTART_RESTARTBLOCK)
goto out;
@@ -2249,7 +2246,7 @@ void __init hrtimers_init(void)
/**
* schedule_hrtimeout_range_clock - sleep until timeout
* @expires: timeout value (ktime_t)
- * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks
+ * @delta: slack in expires timeout (ktime_t)
* @mode: timer mode
* @clock_id: timer clock to be used
*/
@@ -2276,13 +2273,6 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
return -EINTR;
}
- /*
- * Override any slack passed by the user if under
- * rt contraints.
- */
- if (rt_task(current))
- delta = 0;
-
hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
hrtimer_sleeper_start_expires(&t, mode);
@@ -2302,7 +2292,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock);
/**
* schedule_hrtimeout_range - sleep until timeout
* @expires: timeout value (ktime_t)
- * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks
+ * @delta: slack in expires timeout (ktime_t)
* @mode: timer mode
*
* Make the current task sleep until the given expiry time has
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 8d2dd214ec68..802b336f4b8c 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -660,9 +660,17 @@ rearm:
sched_sync_hw_clock(offset_nsec, res != 0);
}
-void ntp_notify_cmos_timer(void)
+void ntp_notify_cmos_timer(bool offset_set)
{
/*
+ * If the time jumped (using ADJ_SETOFFSET) cancels sync timer,
+ * which may have been running if the time was synchronized
+ * prior to the ADJ_SETOFFSET call.
+ */
+ if (offset_set)
+ hrtimer_cancel(&sync_hrtimer);
+
+ /*
* When the work is currently executed but has not yet the timer
* rearmed this queues the work immediately again. No big issue,
* just a pointless work scheduled.
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 23d1b74c3065..5a633dce9057 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -14,9 +14,9 @@ extern int __do_adjtimex(struct __kernel_timex *txc,
extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts);
#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
-extern void ntp_notify_cmos_timer(void);
+extern void ntp_notify_cmos_timer(bool offset_set);
#else
-static inline void ntp_notify_cmos_timer(void) { }
+static inline void ntp_notify_cmos_timer(bool offset_set) { }
#endif
#endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index e9c6f9d0e42c..6bcee4704059 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -453,6 +453,7 @@ static void disarm_timer(struct k_itimer *timer, struct task_struct *p)
struct cpu_timer *ctmr = &timer->it.cpu;
struct posix_cputimer_base *base;
+ timer->it_active = 0;
if (!cpu_timer_dequeue(ctmr))
return;
@@ -559,6 +560,7 @@ static void arm_timer(struct k_itimer *timer, struct task_struct *p)
struct cpu_timer *ctmr = &timer->it.cpu;
u64 newexp = cpu_timer_getexpires(ctmr);
+ timer->it_active = 1;
if (!cpu_timer_enqueue(&base->tqhead, ctmr))
return;
@@ -584,12 +586,8 @@ static void cpu_timer_fire(struct k_itimer *timer)
{
struct cpu_timer *ctmr = &timer->it.cpu;
- if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
- /*
- * User don't want any signal.
- */
- cpu_timer_setexpires(ctmr, 0);
- } else if (unlikely(timer->sigq == NULL)) {
+ timer->it_active = 0;
+ if (unlikely(timer->sigq == NULL)) {
/*
* This a special case for clock_nanosleep,
* not a normal timer from sys_timer_create.
@@ -600,9 +598,9 @@ static void cpu_timer_fire(struct k_itimer *timer)
/*
* One-shot timer. Clear it as soon as it's fired.
*/
- posix_timer_event(timer, 0);
+ posix_timer_queue_signal(timer);
cpu_timer_setexpires(ctmr, 0);
- } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
+ } else if (posix_timer_queue_signal(timer)) {
/*
* The signal did not get queued because the signal
* was ignored, so we won't get any callback to
@@ -614,6 +612,8 @@ static void cpu_timer_fire(struct k_itimer *timer)
}
}
+static void __posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp, u64 now);
+
/*
* Guts of sys_timer_settime for CPU timers.
* This is called with the timer locked and interrupts disabled.
@@ -623,9 +623,10 @@ static void cpu_timer_fire(struct k_itimer *timer)
static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
struct itimerspec64 *new, struct itimerspec64 *old)
{
+ bool sigev_none = timer->it_sigev_notify == SIGEV_NONE;
clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
- u64 old_expires, new_expires, old_incr, val;
struct cpu_timer *ctmr = &timer->it.cpu;
+ u64 old_expires, new_expires, now;
struct sighand_struct *sighand;
struct task_struct *p;
unsigned long flags;
@@ -662,10 +663,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
return -ESRCH;
}
- /*
- * Disarm any old timer after extracting its expiry time.
- */
- old_incr = timer->it_interval;
+ /* Retrieve the current expiry time before disarming the timer */
old_expires = cpu_timer_getexpires(ctmr);
if (unlikely(timer->it.cpu.firing)) {
@@ -673,157 +671,122 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
ret = TIMER_RETRY;
} else {
cpu_timer_dequeue(ctmr);
+ timer->it_active = 0;
}
/*
- * We need to sample the current value to convert the new
- * value from to relative and absolute, and to convert the
- * old value from absolute to relative. To set a process
- * timer, we need a sample to balance the thread expiry
- * times (in arm_timer). With an absolute time, we must
- * check if it's already passed. In short, we need a sample.
+ * Sample the current clock for saving the previous setting
+ * and for rearming the timer.
*/
if (CPUCLOCK_PERTHREAD(timer->it_clock))
- val = cpu_clock_sample(clkid, p);
+ now = cpu_clock_sample(clkid, p);
else
- val = cpu_clock_sample_group(clkid, p, true);
+ now = cpu_clock_sample_group(clkid, p, !sigev_none);
+ /* Retrieve the previous expiry value if requested. */
if (old) {
- if (old_expires == 0) {
- old->it_value.tv_sec = 0;
- old->it_value.tv_nsec = 0;
- } else {
- /*
- * Update the timer in case it has overrun already.
- * If it has, we'll report it as having overrun and
- * with the next reloaded timer already ticking,
- * though we are swallowing that pending
- * notification here to install the new setting.
- */
- u64 exp = bump_cpu_timer(timer, val);
-
- if (val < exp) {
- old_expires = exp - val;
- old->it_value = ns_to_timespec64(old_expires);
- } else {
- old->it_value.tv_nsec = 1;
- old->it_value.tv_sec = 0;
- }
- }
+ old->it_value = (struct timespec64){ };
+ if (old_expires)
+ __posix_cpu_timer_get(timer, old, now);
}
+ /* Retry if the timer expiry is running concurrently */
if (unlikely(ret)) {
- /*
- * We are colliding with the timer actually firing.
- * Punt after filling in the timer's old value, and
- * disable this firing since we are already reporting
- * it as an overrun (thanks to bump_cpu_timer above).
- */
unlock_task_sighand(p, &flags);
goto out;
}
- if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) {
- new_expires += val;
- }
+ /* Convert relative expiry time to absolute */
+ if (new_expires && !(timer_flags & TIMER_ABSTIME))
+ new_expires += now;
+
+ /* Set the new expiry time (might be 0) */
+ cpu_timer_setexpires(ctmr, new_expires);
/*
- * Install the new expiry time (or zero).
- * For a timer with no notification action, we don't actually
- * arm the timer (we'll just fake it for timer_gettime).
+ * Arm the timer if it is not disabled, the new expiry value has
+ * not yet expired and the timer requires signal delivery.
+ * SIGEV_NONE timers are never armed. In case the timer is not
+ * armed, enforce the reevaluation of the timer base so that the
+ * process wide cputime counter can be disabled eventually.
*/
- cpu_timer_setexpires(ctmr, new_expires);
- if (new_expires != 0 && val < new_expires) {
- arm_timer(timer, p);
+ if (likely(!sigev_none)) {
+ if (new_expires && now < new_expires)
+ arm_timer(timer, p);
+ else
+ trigger_base_recalc_expires(timer, p);
}
unlock_task_sighand(p, &flags);
+
+ posix_timer_set_common(timer, new);
+
/*
- * Install the new reload setting, and
- * set up the signal and overrun bookkeeping.
+ * If the new expiry time was already in the past the timer was not
+ * queued. Fire it immediately even if the thread never runs to
+ * accumulate more time on this clock.
*/
- timer->it_interval = timespec64_to_ktime(new->it_interval);
+ if (!sigev_none && new_expires && now >= new_expires)
+ cpu_timer_fire(timer);
+out:
+ rcu_read_unlock();
+ return ret;
+}
+
+static void __posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp, u64 now)
+{
+ bool sigev_none = timer->it_sigev_notify == SIGEV_NONE;
+ u64 expires, iv = timer->it_interval;
/*
- * This acts as a modification timestamp for the timer,
- * so any automatic reload attempt will punt on seeing
- * that we have reset the timer manually.
+ * Make sure that interval timers are moved forward for the
+ * following cases:
+ * - SIGEV_NONE timers which are never armed
+ * - Timers which expired, but the signal has not yet been
+ * delivered
*/
- timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
- ~REQUEUE_PENDING;
- timer->it_overrun_last = 0;
- timer->it_overrun = -1;
-
- if (val >= new_expires) {
- if (new_expires != 0) {
- /*
- * The designated time already passed, so we notify
- * immediately, even if the thread never runs to
- * accumulate more time on this clock.
- */
- cpu_timer_fire(timer);
- }
+ if (iv && ((timer->it_requeue_pending & REQUEUE_PENDING) || sigev_none))
+ expires = bump_cpu_timer(timer, now);
+ else
+ expires = cpu_timer_getexpires(&timer->it.cpu);
+ /*
+ * Expired interval timers cannot have a remaining time <= 0.
+ * The kernel has to move them forward so that the next
+ * timer expiry is > @now.
+ */
+ if (now < expires) {
+ itp->it_value = ns_to_timespec64(expires - now);
+ } else {
/*
- * Make sure we don't keep around the process wide cputime
- * counter or the tick dependency if they are not necessary.
+ * A single shot SIGEV_NONE timer must return 0, when it is
+ * expired! Timers which have a real signal delivery mode
+ * must return a remaining time greater than 0 because the
+ * signal has not yet been delivered.
*/
- sighand = lock_task_sighand(p, &flags);
- if (!sighand)
- goto out;
-
- if (!cpu_timer_queued(ctmr))
- trigger_base_recalc_expires(timer, p);
-
- unlock_task_sighand(p, &flags);
+ if (!sigev_none)
+ itp->it_value.tv_nsec = 1;
}
- out:
- rcu_read_unlock();
- if (old)
- old->it_interval = ns_to_timespec64(old_incr);
-
- return ret;
}
static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp)
{
clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
- struct cpu_timer *ctmr = &timer->it.cpu;
- u64 now, expires = cpu_timer_getexpires(ctmr);
struct task_struct *p;
+ u64 now;
rcu_read_lock();
p = cpu_timer_task_rcu(timer);
- if (!p)
- goto out;
+ if (p && cpu_timer_getexpires(&timer->it.cpu)) {
+ itp->it_interval = ktime_to_timespec64(timer->it_interval);
- /*
- * Easy part: convert the reload time.
- */
- itp->it_interval = ktime_to_timespec64(timer->it_interval);
-
- if (!expires)
- goto out;
-
- /*
- * Sample the clock to take the difference with the expiry time.
- */
- if (CPUCLOCK_PERTHREAD(timer->it_clock))
- now = cpu_clock_sample(clkid, p);
- else
- now = cpu_clock_sample_group(clkid, p, false);
+ if (CPUCLOCK_PERTHREAD(timer->it_clock))
+ now = cpu_clock_sample(clkid, p);
+ else
+ now = cpu_clock_sample_group(clkid, p, false);
- if (now < expires) {
- itp->it_value = ns_to_timespec64(expires - now);
- } else {
- /*
- * The timer should have expired already, but the firing
- * hasn't taken place yet. Say it's just about to expire.
- */
- itp->it_value.tv_nsec = 1;
- itp->it_value.tv_sec = 0;
+ __posix_cpu_timer_get(timer, itp, now);
}
-out:
rcu_read_unlock();
}
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index b924f0f096fa..4576aaed13b2 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -277,10 +277,17 @@ void posixtimer_rearm(struct kernel_siginfo *info)
unlock_timer(timr, flags);
}
-int posix_timer_event(struct k_itimer *timr, int si_private)
+int posix_timer_queue_signal(struct k_itimer *timr)
{
+ int ret, si_private = 0;
enum pid_type type;
- int ret;
+
+ lockdep_assert_held(&timr->it_lock);
+
+ timr->it_active = 0;
+ if (timr->it_interval)
+ si_private = ++timr->it_requeue_pending;
+
/*
* FIXME: if ->sigq is queued we can race with
* dequeue_signal()->posixtimer_rearm().
@@ -309,19 +316,13 @@ int posix_timer_event(struct k_itimer *timr, int si_private)
*/
static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
{
+ struct k_itimer *timr = container_of(timer, struct k_itimer, it.real.timer);
enum hrtimer_restart ret = HRTIMER_NORESTART;
- struct k_itimer *timr;
unsigned long flags;
- int si_private = 0;
- timr = container_of(timer, struct k_itimer, it.real.timer);
spin_lock_irqsave(&timr->it_lock, flags);
- timr->it_active = 0;
- if (timr->it_interval != 0)
- si_private = ++timr->it_requeue_pending;
-
- if (posix_timer_event(timr, si_private)) {
+ if (posix_timer_queue_signal(timr)) {
/*
* The signal was not queued due to SIG_IGN. As a
* consequence the timer is not going to be rearmed from
@@ -338,14 +339,14 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
* change to the signal handling code.
*
* For now let timers with an interval less than a
- * jiffie expire every jiffie and recheck for a
+ * jiffy expire every jiffy and recheck for a
* valid signal handler.
*
* This avoids interrupt starvation in case of a
* very small interval, which would expire the
* timer immediately again.
*
- * Moving now ahead of time by one jiffie tricks
+ * Moving now ahead of time by one jiffy tricks
* hrtimer_forward() to expire the timer later,
* while it still maintains the overrun accuracy
* for the price of a slight inconsistency in the
@@ -515,7 +516,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event,
spin_lock_irq(&current->sighand->siglock);
/* This makes the timer valid in the hash table */
WRITE_ONCE(new_timer->it_signal, current->signal);
- list_add(&new_timer->list, &current->signal->posix_timers);
+ hlist_add_head(&new_timer->list, &current->signal->posix_timers);
spin_unlock_irq(&current->sighand->siglock);
/*
* After unlocking sighand::siglock @new_timer is subject to
@@ -856,6 +857,23 @@ static struct k_itimer *timer_wait_running(struct k_itimer *timer,
return lock_timer(timer_id, flags);
}
+/*
+ * Set up the new interval and reset the signal delivery data
+ */
+void posix_timer_set_common(struct k_itimer *timer, struct itimerspec64 *new_setting)
+{
+ if (new_setting->it_value.tv_sec || new_setting->it_value.tv_nsec)
+ timer->it_interval = timespec64_to_ktime(new_setting->it_interval);
+ else
+ timer->it_interval = 0;
+
+ /* Prevent reloading in case there is a signal pending */
+ timer->it_requeue_pending = (timer->it_requeue_pending + 2) & ~REQUEUE_PENDING;
+ /* Reset overrun accounting */
+ timer->it_overrun_last = 0;
+ timer->it_overrun = -1LL;
+}
+
/* Set a POSIX.1b interval timer. */
int common_timer_set(struct k_itimer *timr, int flags,
struct itimerspec64 *new_setting,
@@ -878,15 +896,12 @@ int common_timer_set(struct k_itimer *timr, int flags,
return TIMER_RETRY;
timr->it_active = 0;
- timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
- ~REQUEUE_PENDING;
- timr->it_overrun_last = 0;
+ posix_timer_set_common(timr, new_setting);
- /* Switch off the timer when it_value is zero */
+ /* Keep timer disarmed when it_value is zero */
if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec)
return 0;
- timr->it_interval = timespec64_to_ktime(new_setting->it_interval);
expires = timespec64_to_ktime(new_setting->it_value);
if (flags & TIMER_ABSTIME)
expires = timens_ktime_to_host(timr->it_clock, expires);
@@ -904,7 +919,7 @@ static int do_timer_settime(timer_t timer_id, int tmr_flags,
const struct k_clock *kc;
struct k_itimer *timr;
unsigned long flags;
- int error = 0;
+ int error;
if (!timespec64_valid(&new_spec64->it_interval) ||
!timespec64_valid(&new_spec64->it_value))
@@ -918,6 +933,9 @@ retry:
if (!timr)
return -EINVAL;
+ if (old_spec64)
+ old_spec64->it_interval = ktime_to_timespec64(timr->it_interval);
+
kc = timr->kclock;
if (WARN_ON_ONCE(!kc || !kc->timer_set))
error = -EINVAL;
@@ -1021,7 +1039,7 @@ retry_delete:
}
spin_lock(&current->sighand->siglock);
- list_del(&timer->list);
+ hlist_del(&timer->list);
spin_unlock(&current->sighand->siglock);
/*
* A concurrent lookup could check timer::it_signal lockless. It
@@ -1071,7 +1089,7 @@ retry_delete:
goto retry_delete;
}
- list_del(&timer->list);
+ hlist_del(&timer->list);
/*
* Setting timer::it_signal to NULL is technically not required
@@ -1092,22 +1110,19 @@ retry_delete:
*/
void exit_itimers(struct task_struct *tsk)
{
- struct list_head timers;
- struct k_itimer *tmr;
+ struct hlist_head timers;
- if (list_empty(&tsk->signal->posix_timers))
+ if (hlist_empty(&tsk->signal->posix_timers))
return;
/* Protect against concurrent read via /proc/$PID/timers */
spin_lock_irq(&tsk->sighand->siglock);
- list_replace_init(&tsk->signal->posix_timers, &timers);
+ hlist_move_list(&tsk->signal->posix_timers, &timers);
spin_unlock_irq(&tsk->sighand->siglock);
/* The timers are not longer accessible via tsk::signal */
- while (!list_empty(&timers)) {
- tmr = list_first_entry(&timers, struct k_itimer, list);
- itimer_delete(tmr);
- }
+ while (!hlist_empty(&timers))
+ itimer_delete(hlist_entry(timers.first, struct k_itimer, list));
}
SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h
index f32a2ebba9b8..4784ea65f685 100644
--- a/kernel/time/posix-timers.h
+++ b/kernel/time/posix-timers.h
@@ -36,10 +36,11 @@ extern const struct k_clock clock_process;
extern const struct k_clock clock_thread;
extern const struct k_clock alarm_clock;
-int posix_timer_event(struct k_itimer *timr, int si_private);
+int posix_timer_queue_signal(struct k_itimer *timr);
void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting);
int common_timer_set(struct k_itimer *timr, int flags,
struct itimerspec64 *new_setting,
struct itimerspec64 *old_setting);
+void posix_timer_set_common(struct k_itimer *timer, struct itimerspec64 *new_setting);
int common_timer_del(struct k_itimer *timer);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5391e4167d60..7e6f409bf311 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2553,6 +2553,7 @@ int do_adjtimex(struct __kernel_timex *txc)
{
struct timekeeper *tk = &tk_core.timekeeper;
struct audit_ntp_data ad;
+ bool offset_set = false;
bool clock_set = false;
struct timespec64 ts;
unsigned long flags;
@@ -2575,6 +2576,7 @@ int do_adjtimex(struct __kernel_timex *txc)
if (ret)
return ret;
+ offset_set = delta.tv_sec != 0;
audit_tk_injoffset(delta);
}
@@ -2608,7 +2610,7 @@ int do_adjtimex(struct __kernel_timex *txc)
if (clock_set)
clock_was_set(CLOCK_SET_WALL);
- ntp_notify_cmos_timer();
+ ntp_notify_cmos_timer(offset_set);
return ret;
}
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 760bbeb1f331..0fc9d066a7be 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -365,7 +365,7 @@ static unsigned long round_jiffies_common(unsigned long j, int cpu,
rem = j % HZ;
/*
- * If the target jiffie is just after a whole second (which can happen
+ * If the target jiffy is just after a whole second (which can happen
* due to delays of the timer irq, long irq off times etc etc) then
* we should round down to the whole second, not up. Use 1/4th second
* as cutoff for this rounding as an extreme upper bound for this.
@@ -672,7 +672,7 @@ static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
* Set the next expiry time and kick the CPU so it
* can reevaluate the wheel:
*/
- base->next_expiry = bucket_expiry;
+ WRITE_ONCE(base->next_expiry, bucket_expiry);
base->timers_pending = true;
base->next_expiry_recalc = false;
trigger_dyntick_cpu(base, timer);
@@ -1561,6 +1561,8 @@ static inline void timer_base_unlock_expiry(struct timer_base *base)
* the waiter to acquire the lock and make progress.
*/
static void timer_sync_wait_running(struct timer_base *base)
+ __releases(&base->lock) __releases(&base->expiry_lock)
+ __acquires(&base->expiry_lock) __acquires(&base->lock)
{
if (atomic_read(&base->timer_waiters)) {
raw_spin_unlock_irq(&base->lock);
@@ -1898,7 +1900,7 @@ static int next_pending_bucket(struct timer_base *base, unsigned offset,
*
* Store next expiry time in base->next_expiry.
*/
-static void next_expiry_recalc(struct timer_base *base)
+static void timer_recalc_next_expiry(struct timer_base *base)
{
unsigned long clk, next, adj;
unsigned lvl, offset = 0;
@@ -1928,7 +1930,7 @@ static void next_expiry_recalc(struct timer_base *base)
* bits are zero, we look at the next level as is. If not we
* need to advance it by one because that's going to be the
* next expiring bucket in that level. base->clk is the next
- * expiring jiffie. So in case of:
+ * expiring jiffy. So in case of:
*
* LVL5 LVL4 LVL3 LVL2 LVL1 LVL0
* 0 0 0 0 0 0
@@ -1964,7 +1966,7 @@ static void next_expiry_recalc(struct timer_base *base)
clk += adj;
}
- base->next_expiry = next;
+ WRITE_ONCE(base->next_expiry, next);
base->next_expiry_recalc = false;
base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA);
}
@@ -1993,7 +1995,7 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
return basem;
/*
- * Round up to the next jiffie. High resolution timers are
+ * Round up to the next jiffy. High resolution timers are
* off, so the hrtimers are expired in the tick and we need to
* make sure that this tick really expires the timer to avoid
* a ping pong of the nohz stop code.
@@ -2007,7 +2009,7 @@ static unsigned long next_timer_interrupt(struct timer_base *base,
unsigned long basej)
{
if (base->next_expiry_recalc)
- next_expiry_recalc(base);
+ timer_recalc_next_expiry(base);
/*
* Move next_expiry for the empty base into the future to prevent an
@@ -2018,7 +2020,7 @@ static unsigned long next_timer_interrupt(struct timer_base *base,
* easy comparable to find out which base holds the first pending timer.
*/
if (!base->timers_pending)
- base->next_expiry = basej + NEXT_TIMER_MAX_DELTA;
+ WRITE_ONCE(base->next_expiry, basej + NEXT_TIMER_MAX_DELTA);
return base->next_expiry;
}
@@ -2252,7 +2254,7 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
base_global, &tevt);
/*
- * If the next event is only one jiffie ahead there is no need to call
+ * If the next event is only one jiffy ahead there is no need to call
* timer migration hierarchy related functions. The value for the next
* global timer in @tevt struct equals then KTIME_MAX. This is also
* true, when the timer base is idle.
@@ -2411,7 +2413,7 @@ static inline void __run_timers(struct timer_base *base)
* jiffies to avoid endless requeuing to current jiffies.
*/
base->clk++;
- next_expiry_recalc(base);
+ timer_recalc_next_expiry(base);
while (levels--)
expire_timers(base, heads + levels);
@@ -2462,8 +2464,40 @@ static void run_local_timers(void)
hrtimer_run_queues();
for (int i = 0; i < NR_BASES; i++, base++) {
- /* Raise the softirq only if required. */
- if (time_after_eq(jiffies, base->next_expiry) ||
+ /*
+ * Raise the softirq only if required.
+ *
+ * timer_base::next_expiry can be written by a remote CPU while
+ * holding the lock. If this write happens at the same time than
+ * the lockless local read, sanity checker could complain about
+ * data corruption.
+ *
+ * There are two possible situations where
+ * timer_base::next_expiry is written by a remote CPU:
+ *
+ * 1. Remote CPU expires global timers of this CPU and updates
+ * timer_base::next_expiry of BASE_GLOBAL afterwards in
+ * next_timer_interrupt() or timer_recalc_next_expiry(). The
+ * worst outcome is a superfluous raise of the timer softirq
+ * when the not yet updated value is read.
+ *
+ * 2. A new first pinned timer is enqueued by a remote CPU
+ * and therefore timer_base::next_expiry of BASE_LOCAL is
+ * updated. When this update is missed, this isn't a
+ * problem, as an IPI is executed nevertheless when the CPU
+ * was idle before. When the CPU wasn't idle but the update
+ * is missed, then the timer would expire one jiffy late -
+ * bad luck.
+ *
+ * Those unlikely corner cases where the worst outcome is only a
+ * one jiffy delay or a superfluous raise of the softirq are
+ * not that expensive as doing the check always while holding
+ * the lock.
+ *
+ * Possible remote writers are using WRITE_ONCE(). Local reader
+ * uses therefore READ_ONCE().
+ */
+ if (time_after_eq(jiffies, READ_ONCE(base->next_expiry)) ||
(i == BASE_DEF && tmigr_requires_handle_remote())) {
raise_softirq(TIMER_SOFTIRQ);
return;
@@ -2730,7 +2764,7 @@ void __init init_timers(void)
*/
void msleep(unsigned int msecs)
{
- unsigned long timeout = msecs_to_jiffies(msecs) + 1;
+ unsigned long timeout = msecs_to_jiffies(msecs);
while (timeout)
timeout = schedule_timeout_uninterruptible(timeout);
@@ -2744,7 +2778,7 @@ EXPORT_SYMBOL(msleep);
*/
unsigned long msleep_interruptible(unsigned int msecs)
{
- unsigned long timeout = msecs_to_jiffies(msecs) + 1;
+ unsigned long timeout = msecs_to_jiffies(msecs);
while (timeout && !signal_pending(current))
timeout = schedule_timeout_interruptible(timeout);