diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-17 22:35:15 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-17 22:35:15 +0300 |
commit | 7f2444d38f6bbfa12bc15e2533d8f9daa85ca02b (patch) | |
tree | 6506ec79036890edfd9797b001391a350b5ac10f /kernel/time/posix-cpu-timers.c | |
parent | c5f12fdb8bd873aa3ffdb79512e6bdac92b257b0 (diff) | |
parent | 77b4b5420422fc037d00b8f3f0e89b2262e4ae29 (diff) | |
download | linux-7f2444d38f6bbfa12bc15e2533d8f9daa85ca02b.tar.xz |
Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core timer updates from Thomas Gleixner:
"Timers and timekeeping updates:
- A large overhaul of the posix CPU timer code which is a preparation
for moving the CPU timer expiry out into task work so it can be
properly accounted on the task/process.
An update to the bogus permission checks will come later during the
merge window as feedback was not complete before heading of for
travel.
- Switch the timerqueue code to use cached rbtrees and get rid of the
homebrewn caching of the leftmost node.
- Consolidate hrtimer_init() + hrtimer_init_sleeper() calls into a
single function
- Implement the separation of hrtimers to be forced to expire in hard
interrupt context even when PREEMPT_RT is enabled and mark the
affected timers accordingly.
- Implement a mechanism for hrtimers and the timer wheel to protect
RT against priority inversion and live lock issues when a (hr)timer
which should be canceled is currently executing the callback.
Instead of infinitely spinning, the task which tries to cancel the
timer blocks on a per cpu base expiry lock which is held and
released by the (hr)timer expiry code.
- Enable the Hyper-V TSC page based sched_clock for Hyper-V guests
resulting in faster access to timekeeping functions.
- Updates to various clocksource/clockevent drivers and their device
tree bindings.
- The usual small improvements all over the place"
* 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (101 commits)
posix-cpu-timers: Fix permission check regression
posix-cpu-timers: Always clear head pointer on dequeue
hrtimer: Add a missing bracket and hide `migration_base' on !SMP
posix-cpu-timers: Make expiry_active check actually work correctly
posix-timers: Unbreak CONFIG_POSIX_TIMERS=n build
tick: Mark sched_timer to expire in hard interrupt context
hrtimer: Add kernel doc annotation for HRTIMER_MODE_HARD
x86/hyperv: Hide pv_ops access for CONFIG_PARAVIRT=n
posix-cpu-timers: Utilize timerqueue for storage
posix-cpu-timers: Move state tracking to struct posix_cputimers
posix-cpu-timers: Deduplicate rlimit handling
posix-cpu-timers: Remove pointless comparisons
posix-cpu-timers: Get rid of 64bit divisions
posix-cpu-timers: Consolidate timer expiry further
posix-cpu-timers: Get rid of zero checks
rlimit: Rewrite non-sensical RLIMIT_CPU comment
posix-cpu-timers: Respect INFINITY for hard RTTIME limit
posix-cpu-timers: Switch thread group sampling to array
posix-cpu-timers: Restructure expiry array
posix-cpu-timers: Remove cputime_expires
...
Diffstat (limited to 'kernel/time/posix-cpu-timers.c')
-rw-r--r-- | kernel/time/posix-cpu-timers.c | 1010 |
1 files changed, 492 insertions, 518 deletions
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 0a426f4e3125..92a431981b1c 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -20,11 +20,20 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer); +void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit) +{ + posix_cputimers_init(pct); + if (cpu_limit != RLIM_INFINITY) { + pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC; + pct->timers_active = true; + } +} + /* * Called after updating RLIMIT_CPU to run cpu timer and update - * tsk->signal->cputime_expires expiration cache if necessary. Needs - * siglock protection since other code may update expiration cache as - * well. + * tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if + * necessary. Needs siglock protection since other code may update the + * expiration cache as well. */ void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) { @@ -35,46 +44,97 @@ void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) spin_unlock_irq(&task->sighand->siglock); } -static int check_clock(const clockid_t which_clock) +/* + * Functions for validating access to tasks. + */ +static struct task_struct *lookup_task(const pid_t pid, bool thread, + bool gettime) { - int error = 0; struct task_struct *p; - const pid_t pid = CPUCLOCK_PID(which_clock); - - if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX) - return -EINVAL; - if (pid == 0) - return 0; + /* + * If the encoded PID is 0, then the timer is targeted at current + * or the process to which current belongs. + */ + if (!pid) + return thread ? current : current->group_leader; - rcu_read_lock(); p = find_task_by_vpid(pid); - if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? - same_thread_group(p, current) : has_group_leader_pid(p))) { - error = -EINVAL; + if (!p) + return p; + + if (thread) + return same_thread_group(p, current) ? p : NULL; + + if (gettime) { + /* + * For clock_gettime(PROCESS) the task does not need to be + * the actual group leader. tsk->sighand gives + * access to the group's clock. + * + * Timers need the group leader because they take a + * reference on it and store the task pointer until the + * timer is destroyed. + */ + return (p == current || thread_group_leader(p)) ? p : NULL; } + + /* + * For processes require that p is group leader. + */ + return has_group_leader_pid(p) ? p : NULL; +} + +static struct task_struct *__get_task_for_clock(const clockid_t clock, + bool getref, bool gettime) +{ + const bool thread = !!CPUCLOCK_PERTHREAD(clock); + const pid_t pid = CPUCLOCK_PID(clock); + struct task_struct *p; + + if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX) + return NULL; + + rcu_read_lock(); + p = lookup_task(pid, thread, gettime); + if (p && getref) + get_task_struct(p); rcu_read_unlock(); + return p; +} - return error; +static inline struct task_struct *get_task_for_clock(const clockid_t clock) +{ + return __get_task_for_clock(clock, true, false); +} + +static inline struct task_struct *get_task_for_clock_get(const clockid_t clock) +{ + return __get_task_for_clock(clock, true, true); +} + +static inline int validate_clock_permissions(const clockid_t clock) +{ + return __get_task_for_clock(clock, false, false) ? 0 : -EINVAL; } /* * Update expiry time from increment, and increase overrun count, * given the current clock sample. */ -static void bump_cpu_timer(struct k_itimer *timer, u64 now) +static u64 bump_cpu_timer(struct k_itimer *timer, u64 now) { + u64 delta, incr, expires = timer->it.cpu.node.expires; int i; - u64 delta, incr; if (!timer->it_interval) - return; + return expires; - if (now < timer->it.cpu.expires) - return; + if (now < expires) + return expires; incr = timer->it_interval; - delta = now + incr - timer->it.cpu.expires; + delta = now + incr - expires; /* Don't use (incr*2 < delta), incr*2 might overflow. */ for (i = 0; incr < delta - incr; i++) @@ -84,48 +144,26 @@ static void bump_cpu_timer(struct k_itimer *timer, u64 now) if (delta < incr) continue; - timer->it.cpu.expires += incr; + timer->it.cpu.node.expires += incr; timer->it_overrun += 1LL << i; delta -= incr; } + return timer->it.cpu.node.expires; } -/** - * task_cputime_zero - Check a task_cputime struct for all zero fields. - * - * @cputime: The struct to compare. - * - * Checks @cputime to see if all fields are zero. Returns true if all fields - * are zero, false if any field is nonzero. - */ -static inline int task_cputime_zero(const struct task_cputime *cputime) +/* Check whether all cache entries contain U64_MAX, i.e. eternal expiry time */ +static inline bool expiry_cache_is_inactive(const struct posix_cputimers *pct) { - if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime) - return 1; - return 0; -} - -static inline u64 prof_ticks(struct task_struct *p) -{ - u64 utime, stime; - - task_cputime(p, &utime, &stime); - - return utime + stime; -} -static inline u64 virt_ticks(struct task_struct *p) -{ - u64 utime, stime; - - task_cputime(p, &utime, &stime); - - return utime; + return !(~pct->bases[CPUCLOCK_PROF].nextevt | + ~pct->bases[CPUCLOCK_VIRT].nextevt | + ~pct->bases[CPUCLOCK_SCHED].nextevt); } static int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) { - int error = check_clock(which_clock); + int error = validate_clock_permissions(which_clock); + if (!error) { tp->tv_sec = 0; tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ); @@ -142,42 +180,66 @@ posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) } static int -posix_cpu_clock_set(const clockid_t which_clock, const struct timespec64 *tp) +posix_cpu_clock_set(const clockid_t clock, const struct timespec64 *tp) { + int error = validate_clock_permissions(clock); + /* * You can never reset a CPU clock, but we check for other errors * in the call before failing with EPERM. */ - int error = check_clock(which_clock); - if (error == 0) { - error = -EPERM; - } - return error; + return error ? : -EPERM; } - /* - * Sample a per-thread clock for the given task. + * Sample a per-thread clock for the given task. clkid is validated. */ -static int cpu_clock_sample(const clockid_t which_clock, - struct task_struct *p, u64 *sample) +static u64 cpu_clock_sample(const clockid_t clkid, struct task_struct *p) { - switch (CPUCLOCK_WHICH(which_clock)) { - default: - return -EINVAL; + u64 utime, stime; + + if (clkid == CPUCLOCK_SCHED) + return task_sched_runtime(p); + + task_cputime(p, &utime, &stime); + + switch (clkid) { case CPUCLOCK_PROF: - *sample = prof_ticks(p); - break; + return utime + stime; case CPUCLOCK_VIRT: - *sample = virt_ticks(p); - break; - case CPUCLOCK_SCHED: - *sample = task_sched_runtime(p); - break; + return utime; + default: + WARN_ON_ONCE(1); } return 0; } +static inline void store_samples(u64 *samples, u64 stime, u64 utime, u64 rtime) +{ + samples[CPUCLOCK_PROF] = stime + utime; + samples[CPUCLOCK_VIRT] = utime; + samples[CPUCLOCK_SCHED] = rtime; +} + +static void task_sample_cputime(struct task_struct *p, u64 *samples) +{ + u64 stime, utime; + + task_cputime(p, &utime, &stime); + store_samples(samples, stime, utime, p->se.sum_exec_runtime); +} + +static void proc_sample_cputime_atomic(struct task_cputime_atomic *at, + u64 *samples) +{ + u64 stime, utime, rtime; + + utime = atomic64_read(&at->utime); + stime = atomic64_read(&at->stime); + rtime = atomic64_read(&at->sum_exec_runtime); + store_samples(samples, stime, utime, rtime); +} + /* * Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg * to avoid race conditions with concurrent updates to cputime. @@ -193,29 +255,56 @@ retry: } } -static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime *sum) +static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, + struct task_cputime *sum) { __update_gt_cputime(&cputime_atomic->utime, sum->utime); __update_gt_cputime(&cputime_atomic->stime, sum->stime); __update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime); } -/* Sample task_cputime_atomic values in "atomic_timers", store results in "times". */ -static inline void sample_cputime_atomic(struct task_cputime *times, - struct task_cputime_atomic *atomic_times) +/** + * thread_group_sample_cputime - Sample cputime for a given task + * @tsk: Task for which cputime needs to be started + * @iimes: Storage for time samples + * + * Called from sys_getitimer() to calculate the expiry time of an active + * timer. That means group cputime accounting is already active. Called + * with task sighand lock held. + * + * Updates @times with an uptodate sample of the thread group cputimes. + */ +void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples) { - times->utime = atomic64_read(&atomic_times->utime); - times->stime = atomic64_read(&atomic_times->stime); - times->sum_exec_runtime = atomic64_read(&atomic_times->sum_exec_runtime); + struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + struct posix_cputimers *pct = &tsk->signal->posix_cputimers; + + WARN_ON_ONCE(!pct->timers_active); + + proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); } -void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) +/** + * thread_group_start_cputime - Start cputime and return a sample + * @tsk: Task for which cputime needs to be started + * @samples: Storage for time samples + * + * The thread group cputime accouting is avoided when there are no posix + * CPU timers armed. Before starting a timer it's required to check whether + * the time accounting is active. If not, a full update of the atomic + * accounting store needs to be done and the accounting enabled. + * + * Updates @times with an uptodate sample of the thread group cputimes. + */ +static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples) { struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; - struct task_cputime sum; + struct posix_cputimers *pct = &tsk->signal->posix_cputimers; /* Check if cputimer isn't running. This is accessed without locking. */ - if (!READ_ONCE(cputimer->running)) { + if (!READ_ONCE(pct->timers_active)) { + struct task_cputime sum; + /* * The POSIX timer interface allows for absolute time expiry * values through the TIMER_ABSTIME flag, therefore we have @@ -225,94 +314,69 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) update_gt_cputime(&cputimer->cputime_atomic, &sum); /* - * We're setting cputimer->running without a lock. Ensure - * this only gets written to in one operation. We set - * running after update_gt_cputime() as a small optimization, - * but barriers are not required because update_gt_cputime() + * We're setting timers_active without a lock. Ensure this + * only gets written to in one operation. We set it after + * update_gt_cputime() as a small optimization, but + * barriers are not required because update_gt_cputime() * can handle concurrent updates. */ - WRITE_ONCE(cputimer->running, true); + WRITE_ONCE(pct->timers_active, true); } - sample_cputime_atomic(times, &cputimer->cputime_atomic); + proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); } -/* - * Sample a process (thread group) clock for the given group_leader task. - * Must be called with task sighand lock held for safe while_each_thread() - * traversal. - */ -static int cpu_clock_sample_group(const clockid_t which_clock, - struct task_struct *p, - u64 *sample) +static void __thread_group_cputime(struct task_struct *tsk, u64 *samples) { - struct task_cputime cputime; + struct task_cputime ct; - switch (CPUCLOCK_WHICH(which_clock)) { - default: - return -EINVAL; - case CPUCLOCK_PROF: - thread_group_cputime(p, &cputime); - *sample = cputime.utime + cputime.stime; - break; - case CPUCLOCK_VIRT: - thread_group_cputime(p, &cputime); - *sample = cputime.utime; - break; - case CPUCLOCK_SCHED: - thread_group_cputime(p, &cputime); - *sample = cputime.sum_exec_runtime; - break; - } - return 0; + thread_group_cputime(tsk, &ct); + store_samples(samples, ct.stime, ct.utime, ct.sum_exec_runtime); } -static int posix_cpu_clock_get_task(struct task_struct *tsk, - const clockid_t which_clock, - struct timespec64 *tp) +/* + * Sample a process (thread group) clock for the given task clkid. If the + * group's cputime accounting is already enabled, read the atomic + * store. Otherwise a full update is required. Task's sighand lock must be + * held to protect the task traversal on a full update. clkid is already + * validated. + */ +static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p, + bool start) { - int err = -EINVAL; - u64 rtn; + struct thread_group_cputimer *cputimer = &p->signal->cputimer; + struct posix_cputimers *pct = &p->signal->posix_cputimers; + u64 samples[CPUCLOCK_MAX]; - if (CPUCLOCK_PERTHREAD(which_clock)) { - if (same_thread_group(tsk, current)) - err = cpu_clock_sample(which_clock, tsk, &rtn); + if (!READ_ONCE(pct->timers_active)) { + if (start) + thread_group_start_cputime(p, samples); + else + __thread_group_cputime(p, samples); } else { - if (tsk == current || thread_group_leader(tsk)) - err = cpu_clock_sample_group(which_clock, tsk, &rtn); + proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples); } - if (!err) - *tp = ns_to_timespec64(rtn); - - return err; + return samples[clkid]; } - -static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec64 *tp) +static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp) { - const pid_t pid = CPUCLOCK_PID(which_clock); - int err = -EINVAL; + const clockid_t clkid = CPUCLOCK_WHICH(clock); + struct task_struct *tsk; + u64 t; - if (pid == 0) { - /* - * Special case constant value for our own clocks. - * We don't have to do any lookup to find ourselves. - */ - err = posix_cpu_clock_get_task(current, which_clock, tp); - } else { - /* - * Find the given PID, and validate that the caller - * should be able to see it. - */ - struct task_struct *p; - rcu_read_lock(); - p = find_task_by_vpid(pid); - if (p) - err = posix_cpu_clock_get_task(p, which_clock, tp); - rcu_read_unlock(); - } + tsk = get_task_for_clock_get(clock); + if (!tsk) + return -EINVAL; - return err; + if (CPUCLOCK_PERTHREAD(clock)) + t = cpu_clock_sample(clkid, tsk); + else + t = cpu_clock_sample_group(clkid, tsk, false); + put_task_struct(tsk); + + *tp = ns_to_timespec64(t); + return 0; } /* @@ -322,44 +386,15 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec64 *t */ static int posix_cpu_timer_create(struct k_itimer *new_timer) { - int ret = 0; - const pid_t pid = CPUCLOCK_PID(new_timer->it_clock); - struct task_struct *p; + struct task_struct *p = get_task_for_clock(new_timer->it_clock); - if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX) + if (!p) return -EINVAL; new_timer->kclock = &clock_posix_cpu; - - INIT_LIST_HEAD(&new_timer->it.cpu.entry); - - rcu_read_lock(); - if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { - if (pid == 0) { - p = current; - } else { - p = find_task_by_vpid(pid); - if (p && !same_thread_group(p, current)) - p = NULL; - } - } else { - if (pid == 0) { - p = current->group_leader; - } else { - p = find_task_by_vpid(pid); - if (p && !has_group_leader_pid(p)) - p = NULL; - } - } + timerqueue_init(&new_timer->it.cpu.node); new_timer->it.cpu.task = p; - if (p) { - get_task_struct(p); - } else { - ret = -EINVAL; - } - rcu_read_unlock(); - - return ret; + return 0; } /* @@ -370,12 +405,14 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer) */ static int posix_cpu_timer_del(struct k_itimer *timer) { - int ret = 0; - unsigned long flags; + struct cpu_timer *ctmr = &timer->it.cpu; + struct task_struct *p = ctmr->task; struct sighand_struct *sighand; - struct task_struct *p = timer->it.cpu.task; + unsigned long flags; + int ret = 0; - WARN_ON_ONCE(p == NULL); + if (WARN_ON_ONCE(!p)) + return -EINVAL; /* * Protect against sighand release/switch in exit/exec and process/ @@ -384,15 +421,15 @@ static int posix_cpu_timer_del(struct k_itimer *timer) sighand = lock_task_sighand(p, &flags); if (unlikely(sighand == NULL)) { /* - * We raced with the reaping of the task. - * The deletion should have cleared us off the list. + * This raced with the reaping of the task. The exit cleanup + * should have removed this timer from the timer queue. */ - WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry)); + WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node)); } else { if (timer->it.cpu.firing) ret = TIMER_RETRY; else - list_del(&timer->it.cpu.entry); + cpu_timer_dequeue(ctmr); unlock_task_sighand(p, &flags); } @@ -403,25 +440,30 @@ static int posix_cpu_timer_del(struct k_itimer *timer) return ret; } -static void cleanup_timers_list(struct list_head *head) +static void cleanup_timerqueue(struct timerqueue_head *head) { - struct cpu_timer_list *timer, *next; + struct timerqueue_node *node; + struct cpu_timer *ctmr; - list_for_each_entry_safe(timer, next, head, entry) - list_del_init(&timer->entry); + while ((node = timerqueue_getnext(head))) { + timerqueue_del(head, node); + ctmr = container_of(node, struct cpu_timer, node); + ctmr->head = NULL; + } } /* - * Clean out CPU timers still ticking when a thread exited. The task - * pointer is cleared, and the expiry time is replaced with the residual - * time for later timer_gettime calls to return. + * Clean out CPU timers which are still armed when a thread exits. The + * timers are only removed from the list. No other updates are done. The + * corresponding posix timers are still accessible, but cannot be rearmed. + * * This must be called with the siglock held. */ -static void cleanup_timers(struct list_head *head) +static void cleanup_timers(struct posix_cputimers *pct) { - cleanup_timers_list(head); - cleanup_timers_list(++head); - cleanup_timers_list(++head); + cleanup_timerqueue(&pct->bases[CPUCLOCK_PROF].tqhead); + cleanup_timerqueue(&pct->bases[CPUCLOCK_VIRT].tqhead); + cleanup_timerqueue(&pct->bases[CPUCLOCK_SCHED].tqhead); } /* @@ -431,16 +473,11 @@ static void cleanup_timers(struct list_head *head) */ void posix_cpu_timers_exit(struct task_struct *tsk) { - cleanup_timers(tsk->cpu_timers); + cleanup_timers(&tsk->posix_cputimers); } void posix_cpu_timers_exit_group(struct task_struct *tsk) { - cleanup_timers(tsk->signal->cpu_timers); -} - -static inline int expires_gt(u64 expires, u64 new_exp) -{ - return expires == 0 || expires > new_exp; + cleanup_timers(&tsk->signal->posix_cputimers); } /* @@ -449,58 +486,33 @@ static inline int expires_gt(u64 expires, u64 new_exp) */ static void arm_timer(struct k_itimer *timer) { - struct task_struct *p = timer->it.cpu.task; - struct list_head *head, *listpos; - struct task_cputime *cputime_expires; - struct cpu_timer_list *const nt = &timer->it.cpu; - struct cpu_timer_list *next; - - if (CPUCLOCK_PERTHREAD(timer->it_clock)) { - head = p->cpu_timers; - cputime_expires = &p->cputime_expires; - } else { - head = p->signal->cpu_timers; - cputime_expires = &p->signal->cputime_expires; - } - head += CPUCLOCK_WHICH(timer->it_clock); - - listpos = head; - list_for_each_entry(next, head, entry) { - if (nt->expires < next->expires) - break; - listpos = &next->entry; - } - list_add(&nt->entry, listpos); - - if (listpos == head) { - u64 exp = nt->expires; + int clkidx = CPUCLOCK_WHICH(timer->it_clock); + struct cpu_timer *ctmr = &timer->it.cpu; + u64 newexp = cpu_timer_getexpires(ctmr); + struct task_struct *p = ctmr->task; + struct posix_cputimer_base *base; + + if (CPUCLOCK_PERTHREAD(timer->it_clock)) + base = p->posix_cputimers.bases + clkidx; + else + base = p->signal->posix_cputimers.bases + clkidx; + + if (!cpu_timer_enqueue(&base->tqhead, ctmr)) + return; - /* - * We are the new earliest-expiring POSIX 1.b timer, hence - * need to update expiration cache. Take into account that - * for process timers we share expiration cache with itimers - * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME. - */ + /* + * We are the new earliest-expiring POSIX 1.b timer, hence + * need to update expiration cache. Take into account that + * for process timers we share expiration cache with itimers + * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME. + */ + if (newexp < base->nextevt) + base->nextevt = newexp; - switch (CPUCLOCK_WHICH(timer->it_clock)) { - case CPUCLOCK_PROF: - if (expires_gt(cputime_expires->prof_exp, exp)) - cputime_expires->prof_exp = exp; - break; - case CPUCLOCK_VIRT: - if (expires_gt(cputime_expires->virt_exp, exp)) - cputime_expires->virt_exp = exp; - break; - case CPUCLOCK_SCHED: - if (expires_gt(cputime_expires->sched_exp, exp)) - cputime_expires->sched_exp = exp; - break; - } - if (CPUCLOCK_PERTHREAD(timer->it_clock)) - tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER); - else - tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER); - } + if (CPUCLOCK_PERTHREAD(timer->it_clock)) + tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER); + else + tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER); } /* @@ -508,24 +520,26 @@ static void arm_timer(struct k_itimer *timer) */ static void cpu_timer_fire(struct k_itimer *timer) { + struct cpu_timer *ctmr = &timer->it.cpu; + if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { /* * User don't want any signal. */ - timer->it.cpu.expires = 0; + cpu_timer_setexpires(ctmr, 0); } else if (unlikely(timer->sigq == NULL)) { /* * This a special case for clock_nanosleep, * not a normal timer from sys_timer_create. */ wake_up_process(timer->it_process); - timer->it.cpu.expires = 0; + cpu_timer_setexpires(ctmr, 0); } else if (!timer->it_interval) { /* * One-shot timer. Clear it as soon as it's fired. */ posix_timer_event(timer, 0); - timer->it.cpu.expires = 0; + cpu_timer_setexpires(ctmr, 0); } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { /* * The signal did not get queued because the signal @@ -539,33 +553,6 @@ static void cpu_timer_fire(struct k_itimer *timer) } /* - * Sample a process (thread group) timer for the given group_leader task. - * Must be called with task sighand lock held for safe while_each_thread() - * traversal. - */ -static int cpu_timer_sample_group(const clockid_t which_clock, - struct task_struct *p, u64 *sample) -{ - struct task_cputime cputime; - - thread_group_cputimer(p, &cputime); - switch (CPUCLOCK_WHICH(which_clock)) { - default: - return -EINVAL; - case CPUCLOCK_PROF: - *sample = cputime.utime + cputime.stime; - break; - case CPUCLOCK_VIRT: - *sample = cputime.utime; - break; - case CPUCLOCK_SCHED: - *sample = cputime.sum_exec_runtime; - break; - } - return 0; -} - -/* * Guts of sys_timer_settime for CPU timers. * This is called with the timer locked and interrupts disabled. * If we return TIMER_RETRY, it's necessary to release the timer's lock @@ -574,13 +561,16 @@ static int cpu_timer_sample_group(const clockid_t which_clock, static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, struct itimerspec64 *new, struct itimerspec64 *old) { - unsigned long flags; - struct sighand_struct *sighand; - struct task_struct *p = timer->it.cpu.task; + clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); u64 old_expires, new_expires, old_incr, val; - int ret; + struct cpu_timer *ctmr = &timer->it.cpu; + struct task_struct *p = ctmr->task; + struct sighand_struct *sighand; + unsigned long flags; + int ret = 0; - WARN_ON_ONCE(p == NULL); + if (WARN_ON_ONCE(!p)) + return -EINVAL; /* * Use the to_ktime conversion because that clamps the maximum @@ -597,22 +587,21 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, * If p has just been reaped, we can no * longer get any information about it at all. */ - if (unlikely(sighand == NULL)) { + if (unlikely(sighand == NULL)) return -ESRCH; - } /* * Disarm any old timer after extracting its expiry time. */ - - ret = 0; old_incr = timer->it_interval; - old_expires = timer->it.cpu.expires; + old_expires = cpu_timer_getexpires(ctmr); + if (unlikely(timer->it.cpu.firing)) { timer->it.cpu.firing = -1; ret = TIMER_RETRY; - } else - list_del_init(&timer->it.cpu.entry); + } else { + cpu_timer_dequeue(ctmr); + } /* * We need to sample the current value to convert the new @@ -622,11 +611,10 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, * times (in arm_timer). With an absolute time, we must * check if it's already passed. In short, we need a sample. */ - if (CPUCLOCK_PERTHREAD(timer->it_clock)) { - cpu_clock_sample(timer->it_clock, p, &val); - } else { - cpu_timer_sample_group(timer->it_clock, p, &val); - } + if (CPUCLOCK_PERTHREAD(timer->it_clock)) + val = cpu_clock_sample(clkid, p); + else + val = cpu_clock_sample_group(clkid, p, true); if (old) { if (old_expires == 0) { @@ -634,18 +622,16 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, old->it_value.tv_nsec = 0; } else { /* - * Update the timer in case it has - * overrun already. If it has, - * we'll report it as having overrun - * and with the next reloaded timer - * already ticking, though we are - * swallowing that pending - * notification here to install the - * new setting. + * Update the timer in case it has overrun already. + * If it has, we'll report it as having overrun and + * with the next reloaded timer already ticking, + * though we are swallowing that pending + * notification here to install the new setting. */ - bump_cpu_timer(timer, val); - if (val < timer->it.cpu.expires) { - old_expires = timer->it.cpu.expires - val; + u64 exp = bump_cpu_timer(timer, val); + + if (val < exp) { + old_expires = exp - val; old->it_value = ns_to_timespec64(old_expires); } else { old->it_value.tv_nsec = 1; @@ -674,7 +660,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, * For a timer with no notification action, we don't actually * arm the timer (we'll just fake it for timer_gettime). */ - timer->it.cpu.expires = new_expires; + cpu_timer_setexpires(ctmr, new_expires); if (new_expires != 0 && val < new_expires) { arm_timer(timer); } @@ -715,24 +701,27 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp) { - u64 now; - struct task_struct *p = timer->it.cpu.task; + clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); + struct cpu_timer *ctmr = &timer->it.cpu; + u64 now, expires = cpu_timer_getexpires(ctmr); + struct task_struct *p = ctmr->task; - WARN_ON_ONCE(p == NULL); + if (WARN_ON_ONCE(!p)) + return; /* * Easy part: convert the reload time. */ itp->it_interval = ktime_to_timespec64(timer->it_interval); - if (!timer->it.cpu.expires) + if (!expires) return; /* * Sample the clock to take the difference with the expiry time. */ if (CPUCLOCK_PERTHREAD(timer->it_clock)) { - cpu_clock_sample(timer->it_clock, p, &now); + now = cpu_clock_sample(clkid, p); } else { struct sighand_struct *sighand; unsigned long flags; @@ -747,18 +736,18 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp /* * The process has been reaped. * We can't even collect a sample any more. - * Call the timer disarmed, nothing else to do. + * Disarm the timer, nothing else to do. */ - timer->it.cpu.expires = 0; + cpu_timer_setexpires(ctmr, 0); return; } else { - cpu_timer_sample_group(timer->it_clock, p, &now); + now = cpu_clock_sample_group(clkid, p, false); unlock_task_sighand(p, &flags); } } - if (now < timer->it.cpu.expires) { - itp->it_value = ns_to_timespec64(timer->it.cpu.expires - now); + if (now < expires) { + itp->it_value = ns_to_timespec64(expires - now); } else { /* * The timer should have expired already, but the firing @@ -769,26 +758,42 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp } } -static unsigned long long -check_timers_list(struct list_head *timers, - struct list_head *firing, - unsigned long long curr) -{ - int maxfire = 20; +#define MAX_COLLECTED 20 - while (!list_empty(timers)) { - struct cpu_timer_list *t; +static u64 collect_timerqueue(struct timerqueue_head *head, + struct list_head *firing, u64 now) +{ + struct timerqueue_node *next; + int i = 0; + + while ((next = timerqueue_getnext(head))) { + struct cpu_timer *ctmr; + u64 expires; + + ctmr = container_of(next, struct cpu_timer, node); + expires = cpu_timer_getexpires(ctmr); + /* Limit the number of timers to expire at once */ + if (++i == MAX_COLLECTED || now < expires) + return expires; + + ctmr->firing = 1; + cpu_timer_dequeue(ctmr); + list_add_tail(&ctmr->elist, firing); + } - t = list_first_entry(timers, struct cpu_timer_list, entry); + return U64_MAX; +} - if (!--maxfire || curr < t->expires) - return t->expires; +static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples, + struct list_head *firing) +{ + struct posix_cputimer_base *base = pct->bases; + int i; - t->firing = 1; - list_move_tail(&t->entry, firing); + for (i = 0; i < CPUCLOCK_MAX; i++, base++) { + base->nextevt = collect_timerqueue(&base->tqhead, firing, + samples[i]); } - - return 0; } static inline void check_dl_overrun(struct task_struct *tsk) @@ -799,6 +804,20 @@ static inline void check_dl_overrun(struct task_struct *tsk) } } +static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard) +{ + if (time < limit) + return false; + + if (print_fatal_signals) { + pr_info("%s Watchdog Timeout (%s): %s[%d]\n", + rt ? "RT" : "CPU", hard ? "hard" : "soft", + current->comm, task_pid_nr(current)); + } + __group_send_sig_info(signo, SEND_SIG_PRIV, current); + return true; +} + /* * Check for any per-thread CPU timers that have fired and move them off * the tsk->cpu_timers[N] list onto the firing list. Here we update the @@ -807,76 +826,50 @@ static inline void check_dl_overrun(struct task_struct *tsk) static void check_thread_timers(struct task_struct *tsk, struct list_head *firing) { - struct list_head *timers = tsk->cpu_timers; - struct task_cputime *tsk_expires = &tsk->cputime_expires; - u64 expires; + struct posix_cputimers *pct = &tsk->posix_cputimers; + u64 samples[CPUCLOCK_MAX]; unsigned long soft; if (dl_task(tsk)) check_dl_overrun(tsk); - /* - * If cputime_expires is zero, then there are no active - * per thread CPU timers. - */ - if (task_cputime_zero(&tsk->cputime_expires)) + if (expiry_cache_is_inactive(pct)) return; - expires = check_timers_list(timers, firing, prof_ticks(tsk)); - tsk_expires->prof_exp = expires; - - expires = check_timers_list(++timers, firing, virt_ticks(tsk)); - tsk_expires->virt_exp = expires; - - tsk_expires->sched_exp = check_timers_list(++timers, firing, - tsk->se.sum_exec_runtime); + task_sample_cputime(tsk, samples); + collect_posix_cputimers(pct, samples, firing); /* * Check for the special case thread timers. */ soft = task_rlimit(tsk, RLIMIT_RTTIME); if (soft != RLIM_INFINITY) { + /* Task RT timeout is accounted in jiffies. RTTIME is usec */ + unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ); unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME); + /* At the hard limit, send SIGKILL. No further action. */ if (hard != RLIM_INFINITY && - tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) { - /* - * At the hard limit, we just die. - * No need to calculate anything else now. - */ - if (print_fatal_signals) { - pr_info("CPU Watchdog Timeout (hard): %s[%d]\n", - tsk->comm, task_pid_nr(tsk)); - } - __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); + check_rlimit(rttime, hard, SIGKILL, true, true)) return; - } - if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) { - /* - * At the soft limit, send a SIGXCPU every second. - */ - if (soft < hard) { - soft += USEC_PER_SEC; - tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = - soft; - } - if (print_fatal_signals) { - pr_info("RT Watchdog Timeout (soft): %s[%d]\n", - tsk->comm, task_pid_nr(tsk)); - } - __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); + + /* At the soft limit, send a SIGXCPU every second */ + if (check_rlimit(rttime, soft, SIGXCPU, true, false)) { + soft += USEC_PER_SEC; + tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = soft; } } - if (task_cputime_zero(tsk_expires)) + + if (expiry_cache_is_inactive(pct)) tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER); } static inline void stop_process_timers(struct signal_struct *sig) { - struct thread_group_cputimer *cputimer = &sig->cputimer; + struct posix_cputimers *pct = &sig->posix_cputimers; - /* Turn off cputimer->running. This is done without locking. */ - WRITE_ONCE(cputimer->running, false); + /* Turn off the active flag. This is done without locking. */ + WRITE_ONCE(pct->timers_active, false); tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER); } @@ -898,7 +891,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); } - if (it->expires && (!*expires || it->expires < *expires)) + if (it->expires && it->expires < *expires) *expires = it->expires; } @@ -911,87 +904,69 @@ static void check_process_timers(struct task_struct *tsk, struct list_head *firing) { struct signal_struct *const sig = tsk->signal; - u64 utime, ptime, virt_expires, prof_expires; - u64 sum_sched_runtime, sched_expires; - struct list_head *timers = sig->cpu_timers; - struct task_cputime cputime; + struct posix_cputimers *pct = &sig->posix_cputimers; + u64 samples[CPUCLOCK_MAX]; unsigned long soft; /* - * If cputimer is not running, then there are no active - * process wide timers (POSIX 1.b, itimers, RLIMIT_CPU). + * If there are no active process wide timers (POSIX 1.b, itimers, + * RLIMIT_CPU) nothing to check. Also skip the process wide timer + * processing when there is already another task handling them. */ - if (!READ_ONCE(tsk->signal->cputimer.running)) + if (!READ_ONCE(pct->timers_active) || pct->expiry_active) return; - /* + /* * Signify that a thread is checking for process timers. * Write access to this field is protected by the sighand lock. */ - sig->cputimer.checking_timer = true; + pct->expiry_active = true; /* - * Collect the current process totals. + * Collect the current process totals. Group accounting is active + * so the sample can be taken directly. */ - thread_group_cputimer(tsk, &cputime); - utime = cputime.utime; - ptime = utime + cputime.stime; - sum_sched_runtime = cputime.sum_exec_runtime; - - prof_expires = check_timers_list(timers, firing, ptime); - virt_expires = check_timers_list(++timers, firing, utime); - sched_expires = check_timers_list(++timers, firing, sum_sched_runtime); + proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, samples); + collect_posix_cputimers(pct, samples, firing); /* * Check for the special case process timers. */ - check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime, - SIGPROF); - check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime, - SIGVTALRM); + check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], + &pct->bases[CPUCLOCK_PROF].nextevt, + samples[CPUCLOCK_PROF], SIGPROF); + check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], + &pct->bases[CPUCLOCK_VIRT].nextevt, + samples[CPUCLOCK_VIRT], SIGVTALRM); + soft = task_rlimit(tsk, RLIMIT_CPU); if (soft != RLIM_INFINITY) { - unsigned long psecs = div_u64(ptime, NSEC_PER_SEC); + /* RLIMIT_CPU is in seconds. Samples are nanoseconds */ unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU); - u64 x; - if (psecs >= hard) { - /* - * At the hard limit, we just die. - * No need to calculate anything else now. - */ - if (print_fatal_signals) { - pr_info("RT Watchdog Timeout (hard): %s[%d]\n", - tsk->comm, task_pid_nr(tsk)); - } - __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); + u64 ptime = samples[CPUCLOCK_PROF]; + u64 softns = (u64)soft * NSEC_PER_SEC; + u64 hardns = (u64)hard * NSEC_PER_SEC; + + /* At the hard limit, send SIGKILL. No further action. */ + if (hard != RLIM_INFINITY && + check_rlimit(ptime, hardns, SIGKILL, false, true)) return; + + /* At the soft limit, send a SIGXCPU every second */ + if (check_rlimit(ptime, softns, SIGXCPU, false, false)) { + sig->rlim[RLIMIT_CPU].rlim_cur = soft + 1; + softns += NSEC_PER_SEC; } - if (psecs >= soft) { - /* - * At the soft limit, send a SIGXCPU every second. - */ - if (print_fatal_signals) { - pr_info("CPU Watchdog Timeout (soft): %s[%d]\n", - tsk->comm, task_pid_nr(tsk)); - } - __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); - if (soft < hard) { - soft++; - sig->rlim[RLIMIT_CPU].rlim_cur = soft; - } - } - x = soft * NSEC_PER_SEC; - if (!prof_expires || x < prof_expires) - prof_expires = x; + + /* Update the expiry cache */ + if (softns < pct->bases[CPUCLOCK_PROF].nextevt) + pct->bases[CPUCLOCK_PROF].nextevt = softns; } - sig->cputime_expires.prof_exp = prof_expires; - sig->cputime_expires.virt_exp = virt_expires; - sig->cputime_expires.sched_exp = sched_expires; - if (task_cputime_zero(&sig->cputime_expires)) + if (expiry_cache_is_inactive(pct)) stop_process_timers(sig); - sig->cputimer.checking_timer = false; + pct->expiry_active = false; } /* @@ -1000,18 +975,21 @@ static void check_process_timers(struct task_struct *tsk, */ static void posix_cpu_timer_rearm(struct k_itimer *timer) { + clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock); + struct cpu_timer *ctmr = &timer->it.cpu; + struct task_struct *p = ctmr->task; struct sighand_struct *sighand; unsigned long flags; - struct task_struct *p = timer->it.cpu.task; u64 now; - WARN_ON_ONCE(p == NULL); + if (WARN_ON_ONCE(!p)) + return; /* * Fetch the current sample and update the timer's expiry time. */ if (CPUCLOCK_PERTHREAD(timer->it_clock)) { - cpu_clock_sample(timer->it_clock, p, &now); + now = cpu_clock_sample(clkid, p); bump_cpu_timer(timer, now); if (unlikely(p->exit_state)) return; @@ -1031,13 +1009,13 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer) * The process has been reaped. * We can't even collect a sample any more. */ - timer->it.cpu.expires = 0; + cpu_timer_setexpires(ctmr, 0); return; } else if (unlikely(p->exit_state) && thread_group_empty(p)) { /* If the process is dying, no need to rearm */ goto unlock; } - cpu_timer_sample_group(timer->it_clock, p, &now); + now = cpu_clock_sample_group(clkid, p, true); bump_cpu_timer(timer, now); /* Leave the sighand locked for the call below. */ } @@ -1051,26 +1029,24 @@ unlock: } /** - * task_cputime_expired - Compare two task_cputime entities. + * task_cputimers_expired - Check whether posix CPU timers are expired * - * @sample: The task_cputime structure to be checked for expiration. - * @expires: Expiration times, against which @sample will be checked. + * @samples: Array of current samples for the CPUCLOCK clocks + * @pct: Pointer to a posix_cputimers container * - * Checks @sample against @expires to see if any field of @sample has expired. - * Returns true if any field of the former is greater than the corresponding - * field of the latter if the latter field is set. Otherwise returns false. + * Returns true if any member of @samples is greater than the corresponding + * member of @pct->bases[CLK].nextevt. False otherwise */ -static inline int task_cputime_expired(const struct task_cputime *sample, - const struct task_cputime *expires) +static inline bool +task_cputimers_expired(const u64 *sample, struct posix_cputimers *pct) { - if (expires->utime && sample->utime >= expires->utime) - return 1; - if (expires->stime && sample->utime + sample->stime >= expires->stime) - return 1; - if (expires->sum_exec_runtime != 0 && - sample->sum_exec_runtime >= expires->sum_exec_runtime) - return 1; - return 0; + int i; + + for (i = 0; i < CPUCLOCK_MAX; i++) { + if (sample[i] >= pct->bases[i].nextevt) + return true; + } + return false; } /** @@ -1083,48 +1059,50 @@ static inline int task_cputime_expired(const struct task_cputime *sample, * timers and compare them with the corresponding expiration times. Return * true if a timer has expired, else return false. */ -static inline int fastpath_timer_check(struct task_struct *tsk) +static inline bool fastpath_timer_check(struct task_struct *tsk) { + struct posix_cputimers *pct = &tsk->posix_cputimers; struct signal_struct *sig; - if (!task_cputime_zero(&tsk->cputime_expires)) { - struct task_cputime task_sample; + if (!expiry_cache_is_inactive(pct)) { + u64 samples[CPUCLOCK_MAX]; - task_cputime(tsk, &task_sample.utime, &task_sample.stime); - task_sample.sum_exec_runtime = tsk->se.sum_exec_runtime; - if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) - return 1; + task_sample_cputime(tsk, samples); + if (task_cputimers_expired(samples, pct)) + return true; } sig = tsk->signal; + pct = &sig->posix_cputimers; /* - * Check if thread group timers expired when the cputimer is - * running and no other thread in the group is already checking - * for thread group cputimers. These fields are read without the - * sighand lock. However, this is fine because this is meant to - * be a fastpath heuristic to determine whether we should try to - * acquire the sighand lock to check/handle timers. + * Check if thread group timers expired when timers are active and + * no other thread in the group is already handling expiry for + * thread group cputimers. These fields are read without the + * sighand lock. However, this is fine because this is meant to be + * a fastpath heuristic to determine whether we should try to + * acquire the sighand lock to handle timer expiry. * - * In the worst case scenario, if 'running' or 'checking_timer' gets - * set but the current thread doesn't see the change yet, we'll wait - * until the next thread in the group gets a scheduler interrupt to - * handle the timer. This isn't an issue in practice because these - * types of delays with signals actually getting sent are expected. + * In the worst case scenario, if concurrently timers_active is set + * or expiry_active is cleared, but the current thread doesn't see + * the change yet, the timer checks are delayed until the next + * thread in the group gets a scheduler interrupt to handle the + * timer. This isn't an issue in practice because these types of + * delays with signals actually getting sent are expected. */ - if (READ_ONCE(sig->cputimer.running) && - !READ_ONCE(sig->cputimer.checking_timer)) { - struct task_cputime group_sample; + if (READ_ONCE(pct->timers_active) && !READ_ONCE(pct->expiry_active)) { + u64 samples[CPUCLOCK_MAX]; - sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic); + proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, + samples); - if (task_cputime_expired(&group_sample, &sig->cputime_expires)) - return 1; + if (task_cputimers_expired(samples, pct)) + return true; } if (dl_task(tsk) && tsk->dl.dl_overrun) - return 1; + return true; - return 0; + return false; } /* @@ -1132,11 +1110,12 @@ static inline int fastpath_timer_check(struct task_struct *tsk) * already updated our counts. We need to check if any timers fire now. * Interrupts are disabled. */ -void run_posix_cpu_timers(struct task_struct *tsk) +void run_posix_cpu_timers(void) { - LIST_HEAD(firing); + struct task_struct *tsk = current; struct k_itimer *timer, *next; unsigned long flags; + LIST_HEAD(firing); lockdep_assert_irqs_disabled(); @@ -1174,11 +1153,11 @@ void run_posix_cpu_timers(struct task_struct *tsk) * each timer's lock before clearing its firing flag, so no * timer call will interfere. */ - list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) { + list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) { int cpu_firing; spin_lock(&timer->it_lock); - list_del_init(&timer->it.cpu.entry); + list_del_init(&timer->it.cpu.elist); cpu_firing = timer->it.cpu.firing; timer->it.cpu.firing = 0; /* @@ -1196,16 +1175,18 @@ void run_posix_cpu_timers(struct task_struct *tsk) * Set one of the process-wide special case CPU timers or RLIMIT_CPU. * The tsk->sighand->siglock must be held by the caller. */ -void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, +void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid, u64 *newval, u64 *oldval) { - u64 now; - int ret; + u64 now, *nextevt; + + if (WARN_ON_ONCE(clkid >= CPUCLOCK_SCHED)) + return; - WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED); - ret = cpu_timer_sample_group(clock_idx, tsk, &now); + nextevt = &tsk->signal->posix_cputimers.bases[clkid].nextevt; + now = cpu_clock_sample_group(clkid, tsk, true); - if (oldval && ret != -EINVAL) { + if (oldval) { /* * We are setting itimer. The *oldval is absolute and we update * it to be relative, *newval argument is relative and we update @@ -1226,19 +1207,11 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, } /* - * Update expiration cache if we are the earliest timer, or eventually - * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire. + * Update expiration cache if this is the earliest timer. CPUCLOCK_PROF + * expiry cache is also used by RLIMIT_CPU!. */ - switch (clock_idx) { - case CPUCLOCK_PROF: - if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval)) - tsk->signal->cputime_expires.prof_exp = *newval; - break; - case CPUCLOCK_VIRT: - if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval)) - tsk->signal->cputime_expires.virt_exp = *newval; - break; - } + if (*newval < *nextevt) + *nextevt = *newval; tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER); } @@ -1260,6 +1233,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, timer.it_overrun = -1; error = posix_cpu_timer_create(&timer); timer.it_process = current; + if (!error) { static struct itimerspec64 zero_it; struct restart_block *restart; @@ -1275,7 +1249,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, } while (!signal_pending(current)) { - if (timer.it.cpu.expires == 0) { + if (!cpu_timer_getexpires(&timer.it.cpu)) { /* * Our timer fired and was reset, below * deletion can not fail. @@ -1297,7 +1271,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, /* * We were interrupted by a signal. */ - expires = timer.it.cpu.expires; + expires = cpu_timer_getexpires(&timer.it.cpu); error = posix_cpu_timer_set(&timer, 0, &zero_it, &it); if (!error) { /* |