From 1c3eda01a79b8e9237d91c52c5a75b20983f47c6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 29 Jun 2017 19:15:07 +0200 Subject: vtime, sched/cputime: Remove vtime_account_user() It's an unnecessary function between vtime_user_exit() and account_user_time(). Tested-by: Luiz Capitulino Signed-off-by: Frederic Weisbecker Reviewed-by: Thomas Gleixner Acked-by: Rik van Riel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1498756511-11714-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/vtime.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 0681fe25abeb..18b405e3cd93 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -67,19 +67,12 @@ static inline void vtime_account_system(struct task_struct *tsk) { } #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void arch_vtime_task_switch(struct task_struct *tsk); -extern void vtime_account_user(struct task_struct *tsk); extern void vtime_user_enter(struct task_struct *tsk); - -static inline void vtime_user_exit(struct task_struct *tsk) -{ - vtime_account_user(tsk); -} - +extern void vtime_user_exit(struct task_struct *tsk); extern void vtime_guest_enter(struct task_struct *tsk); extern void vtime_guest_exit(struct task_struct *tsk); extern void vtime_init_idle(struct task_struct *tsk, int cpu); #else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */ -static inline void vtime_account_user(struct task_struct *tsk) { } static inline void vtime_user_enter(struct task_struct *tsk) { } static inline void vtime_user_exit(struct task_struct *tsk) { } static inline void vtime_guest_enter(struct task_struct *tsk) { } -- cgit v1.2.3 From 60a9ce57e7c5ac1df3a39fb941022bbfa40c0862 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 29 Jun 2017 19:15:09 +0200 Subject: sched/cputime: Rename vtime fields The current "snapshot" based naming on vtime fields suggests we record some past event but that's a low level picture of their actual purpose which comes out blurry. The real point of these fields is to run a basic state machine that tracks down cputime entry while switching between contexts. So lets reflect that with more meaningful names. Tested-by: Luiz Capitulino Signed-off-by: Frederic Weisbecker Reviewed-by: Thomas Gleixner Acked-by: Rik van Riel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1498756511-11714-4-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 4 ++-- include/linux/sched.h | 4 ++-- kernel/fork.c | 4 ++-- kernel/sched/cputime.c | 30 +++++++++++++++--------------- 4 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index e049526bc188..3d537331cd4e 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -171,8 +171,8 @@ extern struct cred init_cred; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN # define INIT_VTIME(tsk) \ .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \ - .vtime_snap = 0, \ - .vtime_snap_whence = VTIME_SYS, + .vtime_starttime = 0, \ + .vtime_state = VTIME_SYS, #else # define INIT_VTIME(tsk) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 9c4ca7433d9d..ff001646549e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -689,7 +689,7 @@ struct task_struct { struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN seqcount_t vtime_seqcount; - unsigned long long vtime_snap; + unsigned long long vtime_starttime; enum { /* Task is sleeping or running in a CPU with VTIME inactive: */ VTIME_INACTIVE = 0, @@ -697,7 +697,7 @@ struct task_struct { VTIME_USER, /* Task runs in kernelspace in a CPU with VTIME active: */ VTIME_SYS, - } vtime_snap_whence; + } vtime_state; #endif #ifdef CONFIG_NO_HZ_FULL diff --git a/kernel/fork.c b/kernel/fork.c index e53770d2bf95..83c4f9bf3e14 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1638,8 +1638,8 @@ static __latent_entropy struct task_struct *copy_process( #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN seqcount_init(&p->vtime_seqcount); - p->vtime_snap = 0; - p->vtime_snap_whence = VTIME_INACTIVE; + p->vtime_starttime = 0; + p->vtime_state = VTIME_INACTIVE; #endif #if defined(SPLIT_RSS_COUNTING) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index ab68927e8e94..8c64753067c5 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -683,10 +683,10 @@ static u64 vtime_delta(struct task_struct *tsk) { unsigned long now = READ_ONCE(jiffies); - if (time_before(now, (unsigned long)tsk->vtime_snap)) + if (time_before(now, (unsigned long)tsk->vtime_starttime)) return 0; - return jiffies_to_nsecs(now - tsk->vtime_snap); + return jiffies_to_nsecs(now - tsk->vtime_starttime); } static u64 get_vtime_delta(struct task_struct *tsk) @@ -701,10 +701,10 @@ static u64 get_vtime_delta(struct task_struct *tsk) * elapsed time. Limit account_other_time to prevent rounding * errors from causing elapsed vtime to go negative. */ - delta = jiffies_to_nsecs(now - tsk->vtime_snap); + delta = jiffies_to_nsecs(now - tsk->vtime_starttime); other = account_other_time(delta); - WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE); - tsk->vtime_snap = now; + WARN_ON_ONCE(tsk->vtime_state == VTIME_INACTIVE); + tsk->vtime_starttime = now; return delta - other; } @@ -746,7 +746,7 @@ void vtime_guest_enter(struct task_struct *tsk) { /* * The flags must be updated under the lock with - * the vtime_snap flush and update. + * the vtime_starttime flush and update. * That enforces a right ordering and update sequence * synchronization against the reader (task_gtime()) * that can thus safely catch up with a tickless delta. @@ -776,12 +776,12 @@ void vtime_account_idle(struct task_struct *tsk) void arch_vtime_task_switch(struct task_struct *prev) { write_seqcount_begin(&prev->vtime_seqcount); - prev->vtime_snap_whence = VTIME_INACTIVE; + prev->vtime_state = VTIME_INACTIVE; write_seqcount_end(&prev->vtime_seqcount); write_seqcount_begin(¤t->vtime_seqcount); - current->vtime_snap_whence = VTIME_SYS; - current->vtime_snap = jiffies; + current->vtime_state = VTIME_SYS; + current->vtime_starttime = jiffies; write_seqcount_end(¤t->vtime_seqcount); } @@ -791,8 +791,8 @@ void vtime_init_idle(struct task_struct *t, int cpu) local_irq_save(flags); write_seqcount_begin(&t->vtime_seqcount); - t->vtime_snap_whence = VTIME_SYS; - t->vtime_snap = jiffies; + t->vtime_state = VTIME_SYS; + t->vtime_starttime = jiffies; write_seqcount_end(&t->vtime_seqcount); local_irq_restore(flags); } @@ -809,7 +809,7 @@ u64 task_gtime(struct task_struct *t) seq = read_seqcount_begin(&t->vtime_seqcount); gtime = t->gtime; - if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU) + if (t->vtime_state == VTIME_SYS && t->flags & PF_VCPU) gtime += vtime_delta(t); } while (read_seqcount_retry(&t->vtime_seqcount, seq)); @@ -840,7 +840,7 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) *stime = t->stime; /* Task is sleeping, nothing to add */ - if (t->vtime_snap_whence == VTIME_INACTIVE || is_idle_task(t)) + if (t->vtime_state == VTIME_INACTIVE || is_idle_task(t)) continue; delta = vtime_delta(t); @@ -849,9 +849,9 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) * Task runs either in user or kernel space, add pending nohz time to * the right place. */ - if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) + if (t->vtime_state == VTIME_USER || t->flags & PF_VCPU) *utime += delta; - else if (t->vtime_snap_whence == VTIME_SYS) + else if (t->vtime_state == VTIME_SYS) *stime += delta; } while (read_seqcount_retry(&t->vtime_seqcount, seq)); } -- cgit v1.2.3 From bac5b6b6b11560f323e71d0ebac4061cfe5f56c0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 29 Jun 2017 19:15:10 +0200 Subject: sched/cputime: Move the vtime task fields to their own struct We are about to add vtime accumulation fields to the task struct. Let's avoid more bloatification and gather vtime information to their own struct. Tested-by: Luiz Capitulino Signed-off-by: Frederic Weisbecker Reviewed-by: Thomas Gleixner Acked-by: Rik van Riel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1498756511-11714-5-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 6 +-- include/linux/sched.h | 26 ++++++----- kernel/fork.c | 6 +-- kernel/sched/cputime.c | 112 ++++++++++++++++++++++++++-------------------- 4 files changed, 86 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 3d537331cd4e..a2f6707e9fc0 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -170,9 +170,9 @@ extern struct cred init_cred; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN # define INIT_VTIME(tsk) \ - .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \ - .vtime_starttime = 0, \ - .vtime_state = VTIME_SYS, + .vtime.seqcount = SEQCNT_ZERO(tsk.vtime.seqcount), \ + .vtime.starttime = 0, \ + .vtime.state = VTIME_SYS, #else # define INIT_VTIME(tsk) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index ff001646549e..eeff8a024f0c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -223,6 +223,21 @@ struct task_cputime { #define prof_exp stime #define sched_exp sum_exec_runtime +enum vtime_state { + /* Task is sleeping or running in a CPU with VTIME inactive: */ + VTIME_INACTIVE = 0, + /* Task runs in userspace in a CPU with VTIME active: */ + VTIME_USER, + /* Task runs in kernelspace in a CPU with VTIME active: */ + VTIME_SYS, +}; + +struct vtime { + seqcount_t seqcount; + unsigned long long starttime; + enum vtime_state state; +}; + struct sched_info { #ifdef CONFIG_SCHED_INFO /* Cumulative counters: */ @@ -688,16 +703,7 @@ struct task_struct { u64 gtime; struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN - seqcount_t vtime_seqcount; - unsigned long long vtime_starttime; - enum { - /* Task is sleeping or running in a CPU with VTIME inactive: */ - VTIME_INACTIVE = 0, - /* Task runs in userspace in a CPU with VTIME active: */ - VTIME_USER, - /* Task runs in kernelspace in a CPU with VTIME active: */ - VTIME_SYS, - } vtime_state; + struct vtime vtime; #endif #ifdef CONFIG_NO_HZ_FULL diff --git a/kernel/fork.c b/kernel/fork.c index 83c4f9bf3e14..d927ec11aa7a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1637,9 +1637,9 @@ static __latent_entropy struct task_struct *copy_process( prev_cputime_init(&p->prev_cputime); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN - seqcount_init(&p->vtime_seqcount); - p->vtime_starttime = 0; - p->vtime_state = VTIME_INACTIVE; + seqcount_init(&p->vtime.seqcount); + p->vtime.starttime = 0; + p->vtime.state = VTIME_INACTIVE; #endif #if defined(SPLIT_RSS_COUNTING) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 8c64753067c5..9ee725edcbe0 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -679,17 +679,17 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -static u64 vtime_delta(struct task_struct *tsk) +static u64 vtime_delta(struct vtime *vtime) { unsigned long now = READ_ONCE(jiffies); - if (time_before(now, (unsigned long)tsk->vtime_starttime)) + if (time_before(now, (unsigned long)vtime->starttime)) return 0; - return jiffies_to_nsecs(now - tsk->vtime_starttime); + return jiffies_to_nsecs(now - vtime->starttime); } -static u64 get_vtime_delta(struct task_struct *tsk) +static u64 get_vtime_delta(struct vtime *vtime) { unsigned long now = READ_ONCE(jiffies); u64 delta, other; @@ -701,49 +701,56 @@ static u64 get_vtime_delta(struct task_struct *tsk) * elapsed time. Limit account_other_time to prevent rounding * errors from causing elapsed vtime to go negative. */ - delta = jiffies_to_nsecs(now - tsk->vtime_starttime); + delta = jiffies_to_nsecs(now - vtime->starttime); other = account_other_time(delta); - WARN_ON_ONCE(tsk->vtime_state == VTIME_INACTIVE); - tsk->vtime_starttime = now; + WARN_ON_ONCE(vtime->state == VTIME_INACTIVE); + vtime->starttime = now; return delta - other; } static void __vtime_account_system(struct task_struct *tsk) { - account_system_time(tsk, irq_count(), get_vtime_delta(tsk)); + account_system_time(tsk, irq_count(), get_vtime_delta(&tsk->vtime)); } void vtime_account_system(struct task_struct *tsk) { - if (!vtime_delta(tsk)) + struct vtime *vtime = &tsk->vtime; + + if (!vtime_delta(vtime)) return; - write_seqcount_begin(&tsk->vtime_seqcount); + write_seqcount_begin(&vtime->seqcount); __vtime_account_system(tsk); - write_seqcount_end(&tsk->vtime_seqcount); + write_seqcount_end(&vtime->seqcount); } void vtime_user_enter(struct task_struct *tsk) { - write_seqcount_begin(&tsk->vtime_seqcount); - if (vtime_delta(tsk)) + struct vtime *vtime = &tsk->vtime; + + write_seqcount_begin(&vtime->seqcount); + if (vtime_delta(vtime)) __vtime_account_system(tsk); - tsk->vtime_snap_whence = VTIME_USER; - write_seqcount_end(&tsk->vtime_seqcount); + vtime->state = VTIME_USER; + write_seqcount_end(&vtime->seqcount); } void vtime_user_exit(struct task_struct *tsk) { - write_seqcount_begin(&tsk->vtime_seqcount); - if (vtime_delta(tsk)) - account_user_time(tsk, get_vtime_delta(tsk)); - tsk->vtime_snap_whence = VTIME_SYS; - write_seqcount_end(&tsk->vtime_seqcount); + struct vtime *vtime = &tsk->vtime; + + write_seqcount_begin(&vtime->seqcount); + if (vtime_delta(vtime)) + account_user_time(tsk, get_vtime_delta(vtime)); + vtime->state = VTIME_SYS; + write_seqcount_end(&vtime->seqcount); } void vtime_guest_enter(struct task_struct *tsk) { + struct vtime *vtime = &tsk->vtime; /* * The flags must be updated under the lock with * the vtime_starttime flush and update. @@ -751,54 +758,62 @@ void vtime_guest_enter(struct task_struct *tsk) * synchronization against the reader (task_gtime()) * that can thus safely catch up with a tickless delta. */ - write_seqcount_begin(&tsk->vtime_seqcount); - if (vtime_delta(tsk)) + write_seqcount_begin(&vtime->seqcount); + if (vtime_delta(vtime)) __vtime_account_system(tsk); current->flags |= PF_VCPU; - write_seqcount_end(&tsk->vtime_seqcount); + write_seqcount_end(&vtime->seqcount); } EXPORT_SYMBOL_GPL(vtime_guest_enter); void vtime_guest_exit(struct task_struct *tsk) { - write_seqcount_begin(&tsk->vtime_seqcount); + struct vtime *vtime = &tsk->vtime; + + write_seqcount_begin(&vtime->seqcount); __vtime_account_system(tsk); current->flags &= ~PF_VCPU; - write_seqcount_end(&tsk->vtime_seqcount); + write_seqcount_end(&vtime->seqcount); } EXPORT_SYMBOL_GPL(vtime_guest_exit); void vtime_account_idle(struct task_struct *tsk) { - account_idle_time(get_vtime_delta(tsk)); + account_idle_time(get_vtime_delta(&tsk->vtime)); } void arch_vtime_task_switch(struct task_struct *prev) { - write_seqcount_begin(&prev->vtime_seqcount); - prev->vtime_state = VTIME_INACTIVE; - write_seqcount_end(&prev->vtime_seqcount); + struct vtime *vtime = &prev->vtime; - write_seqcount_begin(¤t->vtime_seqcount); - current->vtime_state = VTIME_SYS; - current->vtime_starttime = jiffies; - write_seqcount_end(¤t->vtime_seqcount); + write_seqcount_begin(&vtime->seqcount); + vtime->state = VTIME_INACTIVE; + write_seqcount_end(&vtime->seqcount); + + vtime = ¤t->vtime; + + write_seqcount_begin(&vtime->seqcount); + vtime->state = VTIME_SYS; + vtime->starttime = jiffies; + write_seqcount_end(&vtime->seqcount); } void vtime_init_idle(struct task_struct *t, int cpu) { + struct vtime *vtime = &t->vtime; unsigned long flags; local_irq_save(flags); - write_seqcount_begin(&t->vtime_seqcount); - t->vtime_state = VTIME_SYS; - t->vtime_starttime = jiffies; - write_seqcount_end(&t->vtime_seqcount); + write_seqcount_begin(&vtime->seqcount); + vtime->state = VTIME_SYS; + vtime->starttime = jiffies; + write_seqcount_end(&vtime->seqcount); local_irq_restore(flags); } u64 task_gtime(struct task_struct *t) { + struct vtime *vtime = &t->vtime; unsigned int seq; u64 gtime; @@ -806,13 +821,13 @@ u64 task_gtime(struct task_struct *t) return t->gtime; do { - seq = read_seqcount_begin(&t->vtime_seqcount); + seq = read_seqcount_begin(&vtime->seqcount); gtime = t->gtime; - if (t->vtime_state == VTIME_SYS && t->flags & PF_VCPU) - gtime += vtime_delta(t); + if (vtime->state == VTIME_SYS && t->flags & PF_VCPU) + gtime += vtime_delta(vtime); - } while (read_seqcount_retry(&t->vtime_seqcount, seq)); + } while (read_seqcount_retry(&vtime->seqcount, seq)); return gtime; } @@ -824,8 +839,9 @@ u64 task_gtime(struct task_struct *t) */ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) { - u64 delta; + struct vtime *vtime = &t->vtime; unsigned int seq; + u64 delta; if (!vtime_accounting_enabled()) { *utime = t->utime; @@ -834,25 +850,25 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) } do { - seq = read_seqcount_begin(&t->vtime_seqcount); + seq = read_seqcount_begin(&vtime->seqcount); *utime = t->utime; *stime = t->stime; /* Task is sleeping, nothing to add */ - if (t->vtime_state == VTIME_INACTIVE || is_idle_task(t)) + if (vtime->state == VTIME_INACTIVE || is_idle_task(t)) continue; - delta = vtime_delta(t); + delta = vtime_delta(vtime); /* * Task runs either in user or kernel space, add pending nohz time to * the right place. */ - if (t->vtime_state == VTIME_USER || t->flags & PF_VCPU) + if (vtime->state == VTIME_USER || t->flags & PF_VCPU) *utime += delta; - else if (t->vtime_state == VTIME_SYS) + else if (vtime->state == VTIME_SYS) *stime += delta; - } while (read_seqcount_retry(&t->vtime_seqcount, seq)); + } while (read_seqcount_retry(&vtime->seqcount, seq)); } #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ -- cgit v1.2.3 From 2a42eb9594a1480b4ead9e036e06ee1290e5fa6d Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 29 Jun 2017 19:15:11 +0200 Subject: sched/cputime: Accumulate vtime on top of nsec clocksource Currently the cputime source used by vtime is jiffies. When we cross a context boundary and jiffies have changed since the last snapshot, the pending cputime is accounted to the switching out context. This system works ok if the ticks are not aligned across CPUs. If they instead are aligned (ie: all fire at the same time) and the CPUs run in userspace, the jiffies change is only observed on tick exit and therefore the user cputime is accounted as system cputime. This is because the CPU that maintains timekeeping fires its tick at the same time as the others. It updates jiffies in the middle of the tick and the other CPUs see that update on IRQ exit: CPU 0 (timekeeper) CPU 1 ------------------- ------------- jiffies = N ... run in userspace for a jiffy tick entry tick entry (sees jiffies = N) set jiffies = N + 1 tick exit tick exit (sees jiffies = N + 1) account 1 jiffy as stime Fix this with using a nanosec clock source instead of jiffies. The cputime is then accumulated and flushed everytime the pending delta reaches a jiffy in order to mitigate the accounting overhead. [ fweisbec: changelog, rebase on struct vtime, field renames, add delta on cputime readers, keep idle vtime as-is (low overhead accounting), harmonize clock sources. ] Suggested-by: Thomas Gleixner Reported-by: Luiz Capitulino Tested-by: Luiz Capitulino Signed-off-by: Wanpeng Li Signed-off-by: Frederic Weisbecker Reviewed-by: Thomas Gleixner Acked-by: Rik van Riel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1498756511-11714-6-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ kernel/sched/cputime.c | 64 +++++++++++++++++++++++++++++++++----------------- 2 files changed, 45 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index eeff8a024f0c..4818126c5153 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -236,6 +236,9 @@ struct vtime { seqcount_t seqcount; unsigned long long starttime; enum vtime_state state; + u64 utime; + u64 stime; + u64 gtime; }; struct sched_info { diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 9ee725edcbe0..6e3ea4ac1bda 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -681,18 +681,19 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN static u64 vtime_delta(struct vtime *vtime) { - unsigned long now = READ_ONCE(jiffies); + unsigned long long clock; - if (time_before(now, (unsigned long)vtime->starttime)) + clock = sched_clock_cpu(smp_processor_id()); + if (clock < vtime->starttime) return 0; - return jiffies_to_nsecs(now - vtime->starttime); + return clock - vtime->starttime; } static u64 get_vtime_delta(struct vtime *vtime) { - unsigned long now = READ_ONCE(jiffies); - u64 delta, other; + u64 delta = vtime_delta(vtime); + u64 other; /* * Unlike tick based timing, vtime based timing never has lost @@ -701,17 +702,31 @@ static u64 get_vtime_delta(struct vtime *vtime) * elapsed time. Limit account_other_time to prevent rounding * errors from causing elapsed vtime to go negative. */ - delta = jiffies_to_nsecs(now - vtime->starttime); other = account_other_time(delta); WARN_ON_ONCE(vtime->state == VTIME_INACTIVE); - vtime->starttime = now; + vtime->starttime += delta; return delta - other; } -static void __vtime_account_system(struct task_struct *tsk) +static void __vtime_account_system(struct task_struct *tsk, + struct vtime *vtime) { - account_system_time(tsk, irq_count(), get_vtime_delta(&tsk->vtime)); + vtime->stime += get_vtime_delta(vtime); + if (vtime->stime >= TICK_NSEC) { + account_system_time(tsk, irq_count(), vtime->stime); + vtime->stime = 0; + } +} + +static void vtime_account_guest(struct task_struct *tsk, + struct vtime *vtime) +{ + vtime->gtime += get_vtime_delta(vtime); + if (vtime->gtime >= TICK_NSEC) { + account_guest_time(tsk, vtime->gtime); + vtime->gtime = 0; + } } void vtime_account_system(struct task_struct *tsk) @@ -722,7 +737,11 @@ void vtime_account_system(struct task_struct *tsk) return; write_seqcount_begin(&vtime->seqcount); - __vtime_account_system(tsk); + /* We might have scheduled out from guest path */ + if (current->flags & PF_VCPU) + vtime_account_guest(tsk, vtime); + else + __vtime_account_system(tsk, vtime); write_seqcount_end(&vtime->seqcount); } @@ -731,8 +750,7 @@ void vtime_user_enter(struct task_struct *tsk) struct vtime *vtime = &tsk->vtime; write_seqcount_begin(&vtime->seqcount); - if (vtime_delta(vtime)) - __vtime_account_system(tsk); + __vtime_account_system(tsk, vtime); vtime->state = VTIME_USER; write_seqcount_end(&vtime->seqcount); } @@ -742,8 +760,11 @@ void vtime_user_exit(struct task_struct *tsk) struct vtime *vtime = &tsk->vtime; write_seqcount_begin(&vtime->seqcount); - if (vtime_delta(vtime)) - account_user_time(tsk, get_vtime_delta(vtime)); + vtime->utime += get_vtime_delta(vtime); + if (vtime->utime >= TICK_NSEC) { + account_user_time(tsk, vtime->utime); + vtime->utime = 0; + } vtime->state = VTIME_SYS; write_seqcount_end(&vtime->seqcount); } @@ -759,8 +780,7 @@ void vtime_guest_enter(struct task_struct *tsk) * that can thus safely catch up with a tickless delta. */ write_seqcount_begin(&vtime->seqcount); - if (vtime_delta(vtime)) - __vtime_account_system(tsk); + __vtime_account_system(tsk, vtime); current->flags |= PF_VCPU; write_seqcount_end(&vtime->seqcount); } @@ -771,7 +791,7 @@ void vtime_guest_exit(struct task_struct *tsk) struct vtime *vtime = &tsk->vtime; write_seqcount_begin(&vtime->seqcount); - __vtime_account_system(tsk); + vtime_account_guest(tsk, vtime); current->flags &= ~PF_VCPU; write_seqcount_end(&vtime->seqcount); } @@ -794,7 +814,7 @@ void arch_vtime_task_switch(struct task_struct *prev) write_seqcount_begin(&vtime->seqcount); vtime->state = VTIME_SYS; - vtime->starttime = jiffies; + vtime->starttime = sched_clock_cpu(smp_processor_id()); write_seqcount_end(&vtime->seqcount); } @@ -806,7 +826,7 @@ void vtime_init_idle(struct task_struct *t, int cpu) local_irq_save(flags); write_seqcount_begin(&vtime->seqcount); vtime->state = VTIME_SYS; - vtime->starttime = jiffies; + vtime->starttime = sched_clock_cpu(cpu); write_seqcount_end(&vtime->seqcount); local_irq_restore(flags); } @@ -825,7 +845,7 @@ u64 task_gtime(struct task_struct *t) gtime = t->gtime; if (vtime->state == VTIME_SYS && t->flags & PF_VCPU) - gtime += vtime_delta(vtime); + gtime += vtime->gtime + vtime_delta(vtime); } while (read_seqcount_retry(&vtime->seqcount, seq)); @@ -866,9 +886,9 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) * the right place. */ if (vtime->state == VTIME_USER || t->flags & PF_VCPU) - *utime += delta; + *utime += vtime->utime + delta; else if (vtime->state == VTIME_SYS) - *stime += delta; + *stime += vtime->stime + delta; } while (read_seqcount_retry(&vtime->seqcount, seq)); } #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ -- cgit v1.2.3