summaryrefslogtreecommitdiff
path: root/kernel/sched
diff options
context:
space:
mode:
authorFrederic Weisbecker <frederic@kernel.org>2019-10-25 05:03:03 +0300
committerIngo Molnar <mingo@kernel.org>2019-10-29 12:01:17 +0300
commit64eea63c19a2c386a96638f4e54a1355510709e3 (patch)
treee0469223fe098bb8b0a03ce06ba586a0bd9f9567 /kernel/sched
parent023e9deb51c9e1aafacbd421e55beadcb8e87f53 (diff)
downloadlinux-64eea63c19a2c386a96638f4e54a1355510709e3.tar.xz
sched/kcpustat: Introduce vtime-aware kcpustat accessor for CPUTIME_SYSTEM
Kcpustat is not correctly supported on nohz_full CPUs. The tick doesn't fire and the cputime therefore doesn't move forward. The issue has shown up after the vanishing of the remaining 1Hz which has made the stall visible. We are solving that with checking the task running on a CPU through RCU and reading its vtime delta that we add to the raw kcpustat values. We make sure that we fetch a coherent raw-kcpustat/vtime-delta couple sequence while checking that the CPU referred by the target vtime is the correct one, under the locked vtime seqcount. Only CPUTIME_SYSTEM is handled here as a start because it's the trivial case. User and guest time will require more preparation work to correctly handle niceness. Reported-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com> Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rik van Riel <riel@surriel.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Wanpeng Li <wanpengli@tencent.com> Link: https://lkml.kernel.org/r/20191025020303.19342-1-frederic@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/cputime.c82
1 files changed, 82 insertions, 0 deletions
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index b931a19df093..e0cd20693ef5 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -911,4 +911,86 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
*utime += vtime->utime + delta;
} while (read_seqcount_retry(&vtime->seqcount, seq));
}
+
+static int kcpustat_field_vtime(u64 *cpustat,
+ struct vtime *vtime,
+ enum cpu_usage_stat usage,
+ int cpu, u64 *val)
+{
+ unsigned int seq;
+ int err;
+
+ do {
+ seq = read_seqcount_begin(&vtime->seqcount);
+
+ /*
+ * We raced against context switch, fetch the
+ * kcpustat task again.
+ */
+ if (vtime->cpu != cpu && vtime->cpu != -1)
+ return -EAGAIN;
+
+ /*
+ * Two possible things here:
+ * 1) We are seeing the scheduling out task (prev) or any past one.
+ * 2) We are seeing the scheduling in task (next) but it hasn't
+ * passed though vtime_task_switch() yet so the pending
+ * cputime of the prev task may not be flushed yet.
+ *
+ * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
+ */
+ if (vtime->state == VTIME_INACTIVE)
+ return -EAGAIN;
+
+ err = 0;
+
+ *val = cpustat[usage];
+
+ if (vtime->state == VTIME_SYS)
+ *val += vtime->stime + vtime_delta(vtime);
+
+ } while (read_seqcount_retry(&vtime->seqcount, seq));
+
+ return 0;
+}
+
+u64 kcpustat_field(struct kernel_cpustat *kcpustat,
+ enum cpu_usage_stat usage, int cpu)
+{
+ u64 *cpustat = kcpustat->cpustat;
+ struct rq *rq;
+ u64 val;
+ int err;
+
+ if (!vtime_accounting_enabled_cpu(cpu))
+ return cpustat[usage];
+
+ /* Only support sys vtime for now */
+ if (usage != CPUTIME_SYSTEM)
+ return cpustat[usage];
+
+ rq = cpu_rq(cpu);
+
+ for (;;) {
+ struct task_struct *curr;
+ struct vtime *vtime;
+
+ rcu_read_lock();
+ curr = rcu_dereference(rq->curr);
+ if (WARN_ON_ONCE(!curr)) {
+ rcu_read_unlock();
+ return cpustat[usage];
+ }
+
+ vtime = &curr->vtime;
+ err = kcpustat_field_vtime(cpustat, vtime, usage, cpu, &val);
+ rcu_read_unlock();
+
+ if (!err)
+ return val;
+
+ cpu_relax();
+ }
+}
+EXPORT_SYMBOL_GPL(kcpustat_field);
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */