diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/sched/syscalls.c | 8 | ||||
-rw-r--r-- | kernel/signal.c | 34 | ||||
-rw-r--r-- | kernel/sys.c | 2 | ||||
-rw-r--r-- | kernel/time/alarmtimer.c | 9 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 2 | ||||
-rw-r--r-- | kernel/time/hrtimer.c | 22 | ||||
-rw-r--r-- | kernel/time/ntp.c | 10 | ||||
-rw-r--r-- | kernel/time/ntp_internal.h | 4 | ||||
-rw-r--r-- | kernel/time/posix-cpu-timers.c | 207 | ||||
-rw-r--r-- | kernel/time/posix-timers.c | 73 | ||||
-rw-r--r-- | kernel/time/posix-timers.h | 3 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 4 | ||||
-rw-r--r-- | kernel/time/timer.c | 62 |
15 files changed, 231 insertions, 213 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 0c9c5dfc8ddd..d293d52a3e00 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -330,7 +330,7 @@ static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state st /* Poll for one millisecond */ arch_cpuhp_sync_state_poll(); } else { - usleep_range_state(USEC_PER_MSEC, 2 * USEC_PER_MSEC, TASK_UNINTERRUPTIBLE); + usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC); } sync = atomic_read(st); } diff --git a/kernel/fork.c b/kernel/fork.c index cc760491f201..c1b343cba560 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1861,7 +1861,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) prev_cputime_init(&sig->prev_cputime); #ifdef CONFIG_POSIX_TIMERS - INIT_LIST_HEAD(&sig->posix_timers); + INIT_HLIST_HEAD(&sig->posix_timers); hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sig->real_timer.function = it_real_fn; #endif diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index ae1b42775ef9..195d2f2834a9 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -406,6 +406,14 @@ static void __setscheduler_params(struct task_struct *p, else if (fair_policy(policy)) p->static_prio = NICE_TO_PRIO(attr->sched_nice); + /* rt-policy tasks do not have a timerslack */ + if (task_is_realtime(p)) { + p->timer_slack_ns = 0; + } else if (p->timer_slack_ns == 0) { + /* when switching back to non-rt policy, restore timerslack */ + p->timer_slack_ns = p->default_timer_slack_ns; + } + /* * __sched_setscheduler() ensures attr->sched_priority == 0 when * !rt_policy. Always setting this ensures that things like diff --git a/kernel/signal.c b/kernel/signal.c index 60c737e423a1..6f3a5aa39b09 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -618,20 +618,18 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, } /* - * Dequeue a signal and return the element to the caller, which is - * expected to free it. - * - * All callers have to hold the siglock. + * Try to dequeue a signal. If a deliverable signal is found fill in the + * caller provided siginfo and return the signal number. Otherwise return + * 0. */ -int dequeue_signal(struct task_struct *tsk, sigset_t *mask, - kernel_siginfo_t *info, enum pid_type *type) +int dequeue_signal(sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type) { + struct task_struct *tsk = current; bool resched_timer = false; int signr; - /* We only dequeue private signals from ourselves, we don't let - * signalfd steal them - */ + lockdep_assert_held(&tsk->sighand->siglock); + *type = PIDTYPE_PID; signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer); if (!signr) { @@ -1940,10 +1938,11 @@ struct sigqueue *sigqueue_alloc(void) void sigqueue_free(struct sigqueue *q) { - unsigned long flags; spinlock_t *lock = ¤t->sighand->siglock; + unsigned long flags; - BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); + if (WARN_ON_ONCE(!(q->flags & SIGQUEUE_PREALLOC))) + return; /* * We must hold ->siglock while testing q->list * to serialize with collect_signal() or with @@ -1971,7 +1970,10 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type) unsigned long flags; int ret, result; - BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); + if (WARN_ON_ONCE(!(q->flags & SIGQUEUE_PREALLOC))) + return 0; + if (WARN_ON_ONCE(q->info.si_code != SI_TIMER)) + return 0; ret = -1; rcu_read_lock(); @@ -2006,7 +2008,6 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type) * If an SI_TIMER entry is already queue just increment * the overrun count. */ - BUG_ON(q->info.si_code != SI_TIMER); q->info.si_overrun++; result = TRACE_SIGNAL_ALREADY_PENDING; goto out; @@ -2793,8 +2794,7 @@ relock: type = PIDTYPE_PID; signr = dequeue_synchronous_signal(&ksig->info); if (!signr) - signr = dequeue_signal(current, ¤t->blocked, - &ksig->info, &type); + signr = dequeue_signal(¤t->blocked, &ksig->info, &type); if (!signr) break; /* will return 0 */ @@ -3648,7 +3648,7 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info, signotset(&mask); spin_lock_irq(&tsk->sighand->siglock); - sig = dequeue_signal(tsk, &mask, info, &type); + sig = dequeue_signal(&mask, info, &type); if (!sig && timeout) { /* * None ready, temporarily unblock those we're interested @@ -3667,7 +3667,7 @@ static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info, spin_lock_irq(&tsk->sighand->siglock); __set_task_blocked(tsk, &tsk->real_blocked); sigemptyset(&tsk->real_blocked); - sig = dequeue_signal(tsk, &mask, info, &type); + sig = dequeue_signal(&mask, info, &type); } spin_unlock_irq(&tsk->sighand->siglock); diff --git a/kernel/sys.c b/kernel/sys.c index 3a2df1bd9f64..e3c4cffb520c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2557,6 +2557,8 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, error = current->timer_slack_ns; break; case PR_SET_TIMERSLACK: + if (task_is_realtime(current)) + break; if (arg2 <= 0) current->timer_slack_ns = current->default_timer_slack_ns; diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 5abfa4390673..8bf888641694 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -493,7 +493,7 @@ static u64 __alarm_forward_now(struct alarm *alarm, ktime_t interval, bool throt * promised in the context of posix_timer_fn() never * materialized, but someone should really work on it. * - * To prevent DOS fake @now to be 1 jiffie out which keeps + * To prevent DOS fake @now to be 1 jiffy out which keeps * the overrun accounting correct but creates an * inconsistency vs. timer_gettime(2). */ @@ -574,15 +574,10 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, it.alarm.alarmtimer); enum alarmtimer_restart result = ALARMTIMER_NORESTART; unsigned long flags; - int si_private = 0; spin_lock_irqsave(&ptr->it_lock, flags); - ptr->it_active = 0; - if (ptr->it_interval) - si_private = ++ptr->it_requeue_pending; - - if (posix_timer_event(ptr, si_private) && ptr->it_interval) { + if (posix_timer_queue_signal(ptr) && ptr->it_interval) { /* * Handle ignored signals and rearm the timer. This will go * away once we handle ignored signals proper. Ensure that diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 60a6484831b1..78c7bd64d0dd 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -190,7 +190,7 @@ int clockevents_tick_resume(struct clock_event_device *dev) #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST -/* Limit min_delta to a jiffie */ +/* Limit min_delta to a jiffy */ #define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ) /** diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 836157e09e25..12eb40d6290e 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1177,7 +1177,7 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, /* * CONFIG_TIME_LOW_RES indicates that the system has no way to return * granular time values. For relative timers we add hrtimer_resolution - * (i.e. one jiffie) to prevent short timeouts. + * (i.e. one jiffy) to prevent short timeouts. */ timer->is_rel = mode & HRTIMER_MODE_REL; if (timer->is_rel) @@ -1351,11 +1351,13 @@ static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) } static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) + __acquires(&base->softirq_expiry_lock) { spin_lock(&base->softirq_expiry_lock); } static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) + __releases(&base->softirq_expiry_lock) { spin_unlock(&base->softirq_expiry_lock); } @@ -2072,14 +2074,9 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, struct restart_block *restart; struct hrtimer_sleeper t; int ret = 0; - u64 slack; - - slack = current->timer_slack_ns; - if (rt_task(current)) - slack = 0; hrtimer_init_sleeper_on_stack(&t, clockid, mode); - hrtimer_set_expires_range_ns(&t.timer, rqtp, slack); + hrtimer_set_expires_range_ns(&t.timer, rqtp, current->timer_slack_ns); ret = do_nanosleep(&t, mode); if (ret != -ERESTART_RESTARTBLOCK) goto out; @@ -2249,7 +2246,7 @@ void __init hrtimers_init(void) /** * schedule_hrtimeout_range_clock - sleep until timeout * @expires: timeout value (ktime_t) - * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks + * @delta: slack in expires timeout (ktime_t) * @mode: timer mode * @clock_id: timer clock to be used */ @@ -2276,13 +2273,6 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, return -EINTR; } - /* - * Override any slack passed by the user if under - * rt contraints. - */ - if (rt_task(current)) - delta = 0; - hrtimer_init_sleeper_on_stack(&t, clock_id, mode); hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_sleeper_start_expires(&t, mode); @@ -2302,7 +2292,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); /** * schedule_hrtimeout_range - sleep until timeout * @expires: timeout value (ktime_t) - * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks + * @delta: slack in expires timeout (ktime_t) * @mode: timer mode * * Make the current task sleep until the given expiry time has diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 8d2dd214ec68..802b336f4b8c 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -660,9 +660,17 @@ rearm: sched_sync_hw_clock(offset_nsec, res != 0); } -void ntp_notify_cmos_timer(void) +void ntp_notify_cmos_timer(bool offset_set) { /* + * If the time jumped (using ADJ_SETOFFSET) cancels sync timer, + * which may have been running if the time was synchronized + * prior to the ADJ_SETOFFSET call. + */ + if (offset_set) + hrtimer_cancel(&sync_hrtimer); + + /* * When the work is currently executed but has not yet the timer * rearmed this queues the work immediately again. No big issue, * just a pointless work scheduled. diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index 23d1b74c3065..5a633dce9057 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -14,9 +14,9 @@ extern int __do_adjtimex(struct __kernel_timex *txc, extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts); #if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) -extern void ntp_notify_cmos_timer(void); +extern void ntp_notify_cmos_timer(bool offset_set); #else -static inline void ntp_notify_cmos_timer(void) { } +static inline void ntp_notify_cmos_timer(bool offset_set) { } #endif #endif /* _LINUX_NTP_INTERNAL_H */ diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index e9c6f9d0e42c..6bcee4704059 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -453,6 +453,7 @@ static void disarm_timer(struct k_itimer *timer, struct task_struct *p) struct cpu_timer *ctmr = &timer->it.cpu; struct posix_cputimer_base *base; + timer->it_active = 0; if (!cpu_timer_dequeue(ctmr)) return; @@ -559,6 +560,7 @@ static void arm_timer(struct k_itimer *timer, struct task_struct *p) struct cpu_timer *ctmr = &timer->it.cpu; u64 newexp = cpu_timer_getexpires(ctmr); + timer->it_active = 1; if (!cpu_timer_enqueue(&base->tqhead, ctmr)) return; @@ -584,12 +586,8 @@ static void cpu_timer_fire(struct k_itimer *timer) { struct cpu_timer *ctmr = &timer->it.cpu; - if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { - /* - * User don't want any signal. - */ - cpu_timer_setexpires(ctmr, 0); - } else if (unlikely(timer->sigq == NULL)) { + timer->it_active = 0; + if (unlikely(timer->sigq == NULL)) { /* * This a special case for clock_nanosleep, * not a normal timer from sys_timer_create. @@ -600,9 +598,9 @@ static void cpu_timer_fire(struct k_itimer *timer) /* * One-shot timer. Clear it as soon as it's fired. */ - posix_timer_event(timer, 0); + posix_timer_queue_signal(timer); cpu_timer_setexpires(ctmr, 0); - } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { + } else if (posix_timer_queue_signal(timer)) { /* * The signal did not get queued because the signal * was ignored, so we won't get any callback to @@ -614,6 +612,8 @@ static void cpu_timer_fire(struct k_itimer *timer) } } +static void __posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp, u64 now); + /* * Guts of sys_timer_settime for CPU timers. * This is called with the timer locked and interrupts disabled. @@ -623,9 +623,10 @@ static void cpu_timer_fire(struct k_itimer *timer) static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, struct itimerspec64 *new, struct itimerspec64 *old) { + bool sigev_none = timer->it_sigev_notify == SIGEV_NONE; clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); - u64 old_expires, new_expires, old_incr, val; struct cpu_timer *ctmr = &timer->it.cpu; + u64 old_expires, new_expires, now; struct sighand_struct *sighand; struct task_struct *p; unsigned long flags; @@ -662,10 +663,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, return -ESRCH; } - /* - * Disarm any old timer after extracting its expiry time. - */ - old_incr = timer->it_interval; + /* Retrieve the current expiry time before disarming the timer */ old_expires = cpu_timer_getexpires(ctmr); if (unlikely(timer->it.cpu.firing)) { @@ -673,157 +671,122 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, ret = TIMER_RETRY; } else { cpu_timer_dequeue(ctmr); + timer->it_active = 0; } /* - * We need to sample the current value to convert the new - * value from to relative and absolute, and to convert the - * old value from absolute to relative. To set a process - * timer, we need a sample to balance the thread expiry - * times (in arm_timer). With an absolute time, we must - * check if it's already passed. In short, we need a sample. + * Sample the current clock for saving the previous setting + * and for rearming the timer. */ if (CPUCLOCK_PERTHREAD(timer->it_clock)) - val = cpu_clock_sample(clkid, p); + now = cpu_clock_sample(clkid, p); else - val = cpu_clock_sample_group(clkid, p, true); + now = cpu_clock_sample_group(clkid, p, !sigev_none); + /* Retrieve the previous expiry value if requested. */ if (old) { - if (old_expires == 0) { - old->it_value.tv_sec = 0; - old->it_value.tv_nsec = 0; - } else { - /* - * Update the timer in case it has overrun already. - * If it has, we'll report it as having overrun and - * with the next reloaded timer already ticking, - * though we are swallowing that pending - * notification here to install the new setting. - */ - u64 exp = bump_cpu_timer(timer, val); - - if (val < exp) { - old_expires = exp - val; - old->it_value = ns_to_timespec64(old_expires); - } else { - old->it_value.tv_nsec = 1; - old->it_value.tv_sec = 0; - } - } + old->it_value = (struct timespec64){ }; + if (old_expires) + __posix_cpu_timer_get(timer, old, now); } + /* Retry if the timer expiry is running concurrently */ if (unlikely(ret)) { - /* - * We are colliding with the timer actually firing. - * Punt after filling in the timer's old value, and - * disable this firing since we are already reporting - * it as an overrun (thanks to bump_cpu_timer above). - */ unlock_task_sighand(p, &flags); goto out; } - if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) { - new_expires += val; - } + /* Convert relative expiry time to absolute */ + if (new_expires && !(timer_flags & TIMER_ABSTIME)) + new_expires += now; + + /* Set the new expiry time (might be 0) */ + cpu_timer_setexpires(ctmr, new_expires); /* - * Install the new expiry time (or zero). - * For a timer with no notification action, we don't actually - * arm the timer (we'll just fake it for timer_gettime). + * Arm the timer if it is not disabled, the new expiry value has + * not yet expired and the timer requires signal delivery. + * SIGEV_NONE timers are never armed. In case the timer is not + * armed, enforce the reevaluation of the timer base so that the + * process wide cputime counter can be disabled eventually. */ - cpu_timer_setexpires(ctmr, new_expires); - if (new_expires != 0 && val < new_expires) { - arm_timer(timer, p); + if (likely(!sigev_none)) { + if (new_expires && now < new_expires) + arm_timer(timer, p); + else + trigger_base_recalc_expires(timer, p); } unlock_task_sighand(p, &flags); + + posix_timer_set_common(timer, new); + /* - * Install the new reload setting, and - * set up the signal and overrun bookkeeping. + * If the new expiry time was already in the past the timer was not + * queued. Fire it immediately even if the thread never runs to + * accumulate more time on this clock. */ - timer->it_interval = timespec64_to_ktime(new->it_interval); + if (!sigev_none && new_expires && now >= new_expires) + cpu_timer_fire(timer); +out: + rcu_read_unlock(); + return ret; +} + +static void __posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp, u64 now) +{ + bool sigev_none = timer->it_sigev_notify == SIGEV_NONE; + u64 expires, iv = timer->it_interval; /* - * This acts as a modification timestamp for the timer, - * so any automatic reload attempt will punt on seeing - * that we have reset the timer manually. + * Make sure that interval timers are moved forward for the + * following cases: + * - SIGEV_NONE timers which are never armed + * - Timers which expired, but the signal has not yet been + * delivered */ - timer->it_requeue_pending = (timer->it_requeue_pending + 2) & - ~REQUEUE_PENDING; - timer->it_overrun_last = 0; - timer->it_overrun = -1; - - if (val >= new_expires) { - if (new_expires != 0) { - /* - * The designated time already passed, so we notify - * immediately, even if the thread never runs to - * accumulate more time on this clock. - */ - cpu_timer_fire(timer); - } + if (iv && ((timer->it_requeue_pending & REQUEUE_PENDING) || sigev_none)) + expires = bump_cpu_timer(timer, now); + else + expires = cpu_timer_getexpires(&timer->it.cpu); + /* + * Expired interval timers cannot have a remaining time <= 0. + * The kernel has to move them forward so that the next + * timer expiry is > @now. + */ + if (now < expires) { + itp->it_value = ns_to_timespec64(expires - now); + } else { /* - * Make sure we don't keep around the process wide cputime - * counter or the tick dependency if they are not necessary. + * A single shot SIGEV_NONE timer must return 0, when it is + * expired! Timers which have a real signal delivery mode + * must return a remaining time greater than 0 because the + * signal has not yet been delivered. */ - sighand = lock_task_sighand(p, &flags); - if (!sighand) - goto out; - - if (!cpu_timer_queued(ctmr)) - trigger_base_recalc_expires(timer, p); - - unlock_task_sighand(p, &flags); + if (!sigev_none) + itp->it_value.tv_nsec = 1; } - out: - rcu_read_unlock(); - if (old) - old->it_interval = ns_to_timespec64(old_incr); - - return ret; } static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp) { clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); - struct cpu_timer *ctmr = &timer->it.cpu; - u64 now, expires = cpu_timer_getexpires(ctmr); struct task_struct *p; + u64 now; rcu_read_lock(); p = cpu_timer_task_rcu(timer); - if (!p) - goto out; + if (p && cpu_timer_getexpires(&timer->it.cpu)) { + itp->it_interval = ktime_to_timespec64(timer->it_interval); - /* - * Easy part: convert the reload time. - */ - itp->it_interval = ktime_to_timespec64(timer->it_interval); - - if (!expires) - goto out; - - /* - * Sample the clock to take the difference with the expiry time. - */ - if (CPUCLOCK_PERTHREAD(timer->it_clock)) - now = cpu_clock_sample(clkid, p); - else - now = cpu_clock_sample_group(clkid, p, false); + if (CPUCLOCK_PERTHREAD(timer->it_clock)) + now = cpu_clock_sample(clkid, p); + else + now = cpu_clock_sample_group(clkid, p, false); - if (now < expires) { - itp->it_value = ns_to_timespec64(expires - now); - } else { - /* - * The timer should have expired already, but the firing - * hasn't taken place yet. Say it's just about to expire. - */ - itp->it_value.tv_nsec = 1; - itp->it_value.tv_sec = 0; + __posix_cpu_timer_get(timer, itp, now); } -out: rcu_read_unlock(); } diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index b924f0f096fa..4576aaed13b2 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -277,10 +277,17 @@ void posixtimer_rearm(struct kernel_siginfo *info) unlock_timer(timr, flags); } -int posix_timer_event(struct k_itimer *timr, int si_private) +int posix_timer_queue_signal(struct k_itimer *timr) { + int ret, si_private = 0; enum pid_type type; - int ret; + + lockdep_assert_held(&timr->it_lock); + + timr->it_active = 0; + if (timr->it_interval) + si_private = ++timr->it_requeue_pending; + /* * FIXME: if ->sigq is queued we can race with * dequeue_signal()->posixtimer_rearm(). @@ -309,19 +316,13 @@ int posix_timer_event(struct k_itimer *timr, int si_private) */ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) { + struct k_itimer *timr = container_of(timer, struct k_itimer, it.real.timer); enum hrtimer_restart ret = HRTIMER_NORESTART; - struct k_itimer *timr; unsigned long flags; - int si_private = 0; - timr = container_of(timer, struct k_itimer, it.real.timer); spin_lock_irqsave(&timr->it_lock, flags); - timr->it_active = 0; - if (timr->it_interval != 0) - si_private = ++timr->it_requeue_pending; - - if (posix_timer_event(timr, si_private)) { + if (posix_timer_queue_signal(timr)) { /* * The signal was not queued due to SIG_IGN. As a * consequence the timer is not going to be rearmed from @@ -338,14 +339,14 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) * change to the signal handling code. * * For now let timers with an interval less than a - * jiffie expire every jiffie and recheck for a + * jiffy expire every jiffy and recheck for a * valid signal handler. * * This avoids interrupt starvation in case of a * very small interval, which would expire the * timer immediately again. * - * Moving now ahead of time by one jiffie tricks + * Moving now ahead of time by one jiffy tricks * hrtimer_forward() to expire the timer later, * while it still maintains the overrun accuracy * for the price of a slight inconsistency in the @@ -515,7 +516,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, spin_lock_irq(¤t->sighand->siglock); /* This makes the timer valid in the hash table */ WRITE_ONCE(new_timer->it_signal, current->signal); - list_add(&new_timer->list, ¤t->signal->posix_timers); + hlist_add_head(&new_timer->list, ¤t->signal->posix_timers); spin_unlock_irq(¤t->sighand->siglock); /* * After unlocking sighand::siglock @new_timer is subject to @@ -856,6 +857,23 @@ static struct k_itimer *timer_wait_running(struct k_itimer *timer, return lock_timer(timer_id, flags); } +/* + * Set up the new interval and reset the signal delivery data + */ +void posix_timer_set_common(struct k_itimer *timer, struct itimerspec64 *new_setting) +{ + if (new_setting->it_value.tv_sec || new_setting->it_value.tv_nsec) + timer->it_interval = timespec64_to_ktime(new_setting->it_interval); + else + timer->it_interval = 0; + + /* Prevent reloading in case there is a signal pending */ + timer->it_requeue_pending = (timer->it_requeue_pending + 2) & ~REQUEUE_PENDING; + /* Reset overrun accounting */ + timer->it_overrun_last = 0; + timer->it_overrun = -1LL; +} + /* Set a POSIX.1b interval timer. */ int common_timer_set(struct k_itimer *timr, int flags, struct itimerspec64 *new_setting, @@ -878,15 +896,12 @@ int common_timer_set(struct k_itimer *timr, int flags, return TIMER_RETRY; timr->it_active = 0; - timr->it_requeue_pending = (timr->it_requeue_pending + 2) & - ~REQUEUE_PENDING; - timr->it_overrun_last = 0; + posix_timer_set_common(timr, new_setting); - /* Switch off the timer when it_value is zero */ + /* Keep timer disarmed when it_value is zero */ if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) return 0; - timr->it_interval = timespec64_to_ktime(new_setting->it_interval); expires = timespec64_to_ktime(new_setting->it_value); if (flags & TIMER_ABSTIME) expires = timens_ktime_to_host(timr->it_clock, expires); @@ -904,7 +919,7 @@ static int do_timer_settime(timer_t timer_id, int tmr_flags, const struct k_clock *kc; struct k_itimer *timr; unsigned long flags; - int error = 0; + int error; if (!timespec64_valid(&new_spec64->it_interval) || !timespec64_valid(&new_spec64->it_value)) @@ -918,6 +933,9 @@ retry: if (!timr) return -EINVAL; + if (old_spec64) + old_spec64->it_interval = ktime_to_timespec64(timr->it_interval); + kc = timr->kclock; if (WARN_ON_ONCE(!kc || !kc->timer_set)) error = -EINVAL; @@ -1021,7 +1039,7 @@ retry_delete: } spin_lock(¤t->sighand->siglock); - list_del(&timer->list); + hlist_del(&timer->list); spin_unlock(¤t->sighand->siglock); /* * A concurrent lookup could check timer::it_signal lockless. It @@ -1071,7 +1089,7 @@ retry_delete: goto retry_delete; } - list_del(&timer->list); + hlist_del(&timer->list); /* * Setting timer::it_signal to NULL is technically not required @@ -1092,22 +1110,19 @@ retry_delete: */ void exit_itimers(struct task_struct *tsk) { - struct list_head timers; - struct k_itimer *tmr; + struct hlist_head timers; - if (list_empty(&tsk->signal->posix_timers)) + if (hlist_empty(&tsk->signal->posix_timers)) return; /* Protect against concurrent read via /proc/$PID/timers */ spin_lock_irq(&tsk->sighand->siglock); - list_replace_init(&tsk->signal->posix_timers, &timers); + hlist_move_list(&tsk->signal->posix_timers, &timers); spin_unlock_irq(&tsk->sighand->siglock); /* The timers are not longer accessible via tsk::signal */ - while (!list_empty(&timers)) { - tmr = list_first_entry(&timers, struct k_itimer, list); - itimer_delete(tmr); - } + while (!hlist_empty(&timers)) + itimer_delete(hlist_entry(timers.first, struct k_itimer, list)); } SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h index f32a2ebba9b8..4784ea65f685 100644 --- a/kernel/time/posix-timers.h +++ b/kernel/time/posix-timers.h @@ -36,10 +36,11 @@ extern const struct k_clock clock_process; extern const struct k_clock clock_thread; extern const struct k_clock alarm_clock; -int posix_timer_event(struct k_itimer *timr, int si_private); +int posix_timer_queue_signal(struct k_itimer *timr); void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting); int common_timer_set(struct k_itimer *timr, int flags, struct itimerspec64 *new_setting, struct itimerspec64 *old_setting); +void posix_timer_set_common(struct k_itimer *timer, struct itimerspec64 *new_setting); int common_timer_del(struct k_itimer *timer); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 5391e4167d60..7e6f409bf311 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2553,6 +2553,7 @@ int do_adjtimex(struct __kernel_timex *txc) { struct timekeeper *tk = &tk_core.timekeeper; struct audit_ntp_data ad; + bool offset_set = false; bool clock_set = false; struct timespec64 ts; unsigned long flags; @@ -2575,6 +2576,7 @@ int do_adjtimex(struct __kernel_timex *txc) if (ret) return ret; + offset_set = delta.tv_sec != 0; audit_tk_injoffset(delta); } @@ -2608,7 +2610,7 @@ int do_adjtimex(struct __kernel_timex *txc) if (clock_set) clock_was_set(CLOCK_SET_WALL); - ntp_notify_cmos_timer(); + ntp_notify_cmos_timer(offset_set); return ret; } diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 760bbeb1f331..0fc9d066a7be 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -365,7 +365,7 @@ static unsigned long round_jiffies_common(unsigned long j, int cpu, rem = j % HZ; /* - * If the target jiffie is just after a whole second (which can happen + * If the target jiffy is just after a whole second (which can happen * due to delays of the timer irq, long irq off times etc etc) then * we should round down to the whole second, not up. Use 1/4th second * as cutoff for this rounding as an extreme upper bound for this. @@ -672,7 +672,7 @@ static void enqueue_timer(struct timer_base *base, struct timer_list *timer, * Set the next expiry time and kick the CPU so it * can reevaluate the wheel: */ - base->next_expiry = bucket_expiry; + WRITE_ONCE(base->next_expiry, bucket_expiry); base->timers_pending = true; base->next_expiry_recalc = false; trigger_dyntick_cpu(base, timer); @@ -1561,6 +1561,8 @@ static inline void timer_base_unlock_expiry(struct timer_base *base) * the waiter to acquire the lock and make progress. */ static void timer_sync_wait_running(struct timer_base *base) + __releases(&base->lock) __releases(&base->expiry_lock) + __acquires(&base->expiry_lock) __acquires(&base->lock) { if (atomic_read(&base->timer_waiters)) { raw_spin_unlock_irq(&base->lock); @@ -1898,7 +1900,7 @@ static int next_pending_bucket(struct timer_base *base, unsigned offset, * * Store next expiry time in base->next_expiry. */ -static void next_expiry_recalc(struct timer_base *base) +static void timer_recalc_next_expiry(struct timer_base *base) { unsigned long clk, next, adj; unsigned lvl, offset = 0; @@ -1928,7 +1930,7 @@ static void next_expiry_recalc(struct timer_base *base) * bits are zero, we look at the next level as is. If not we * need to advance it by one because that's going to be the * next expiring bucket in that level. base->clk is the next - * expiring jiffie. So in case of: + * expiring jiffy. So in case of: * * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0 * 0 0 0 0 0 0 @@ -1964,7 +1966,7 @@ static void next_expiry_recalc(struct timer_base *base) clk += adj; } - base->next_expiry = next; + WRITE_ONCE(base->next_expiry, next); base->next_expiry_recalc = false; base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA); } @@ -1993,7 +1995,7 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires) return basem; /* - * Round up to the next jiffie. High resolution timers are + * Round up to the next jiffy. High resolution timers are * off, so the hrtimers are expired in the tick and we need to * make sure that this tick really expires the timer to avoid * a ping pong of the nohz stop code. @@ -2007,7 +2009,7 @@ static unsigned long next_timer_interrupt(struct timer_base *base, unsigned long basej) { if (base->next_expiry_recalc) - next_expiry_recalc(base); + timer_recalc_next_expiry(base); /* * Move next_expiry for the empty base into the future to prevent an @@ -2018,7 +2020,7 @@ static unsigned long next_timer_interrupt(struct timer_base *base, * easy comparable to find out which base holds the first pending timer. */ if (!base->timers_pending) - base->next_expiry = basej + NEXT_TIMER_MAX_DELTA; + WRITE_ONCE(base->next_expiry, basej + NEXT_TIMER_MAX_DELTA); return base->next_expiry; } @@ -2252,7 +2254,7 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem, base_global, &tevt); /* - * If the next event is only one jiffie ahead there is no need to call + * If the next event is only one jiffy ahead there is no need to call * timer migration hierarchy related functions. The value for the next * global timer in @tevt struct equals then KTIME_MAX. This is also * true, when the timer base is idle. @@ -2411,7 +2413,7 @@ static inline void __run_timers(struct timer_base *base) * jiffies to avoid endless requeuing to current jiffies. */ base->clk++; - next_expiry_recalc(base); + timer_recalc_next_expiry(base); while (levels--) expire_timers(base, heads + levels); @@ -2462,8 +2464,40 @@ static void run_local_timers(void) hrtimer_run_queues(); for (int i = 0; i < NR_BASES; i++, base++) { - /* Raise the softirq only if required. */ - if (time_after_eq(jiffies, base->next_expiry) || + /* + * Raise the softirq only if required. + * + * timer_base::next_expiry can be written by a remote CPU while + * holding the lock. If this write happens at the same time than + * the lockless local read, sanity checker could complain about + * data corruption. + * + * There are two possible situations where + * timer_base::next_expiry is written by a remote CPU: + * + * 1. Remote CPU expires global timers of this CPU and updates + * timer_base::next_expiry of BASE_GLOBAL afterwards in + * next_timer_interrupt() or timer_recalc_next_expiry(). The + * worst outcome is a superfluous raise of the timer softirq + * when the not yet updated value is read. + * + * 2. A new first pinned timer is enqueued by a remote CPU + * and therefore timer_base::next_expiry of BASE_LOCAL is + * updated. When this update is missed, this isn't a + * problem, as an IPI is executed nevertheless when the CPU + * was idle before. When the CPU wasn't idle but the update + * is missed, then the timer would expire one jiffy late - + * bad luck. + * + * Those unlikely corner cases where the worst outcome is only a + * one jiffy delay or a superfluous raise of the softirq are + * not that expensive as doing the check always while holding + * the lock. + * + * Possible remote writers are using WRITE_ONCE(). Local reader + * uses therefore READ_ONCE(). + */ + if (time_after_eq(jiffies, READ_ONCE(base->next_expiry)) || (i == BASE_DEF && tmigr_requires_handle_remote())) { raise_softirq(TIMER_SOFTIRQ); return; @@ -2730,7 +2764,7 @@ void __init init_timers(void) */ void msleep(unsigned int msecs) { - unsigned long timeout = msecs_to_jiffies(msecs) + 1; + unsigned long timeout = msecs_to_jiffies(msecs); while (timeout) timeout = schedule_timeout_uninterruptible(timeout); @@ -2744,7 +2778,7 @@ EXPORT_SYMBOL(msleep); */ unsigned long msleep_interruptible(unsigned int msecs) { - unsigned long timeout = msecs_to_jiffies(msecs) + 1; + unsigned long timeout = msecs_to_jiffies(msecs); while (timeout && !signal_pending(current)) timeout = schedule_timeout_interruptible(timeout); |