From ca74e92b4698276b6696f15a801759f50944f387 Mon Sep 17 00:00:00 2001 From: Shailabh Nagar Date: Fri, 14 Jul 2006 00:24:36 -0700 Subject: [PATCH] per-task-delay-accounting: setup Initialization code related to collection of per-task "delay" statistics which measure how long it had to wait for cpu, sync block io, swapping etc. The collection of statistics and the interface are in other patches. This patch sets up the data structures and allows the statistics collection to be disabled through a kernel boot parameter. Signed-off-by: Shailabh Nagar Signed-off-by: Balbir Singh Cc: Jes Sorensen Cc: Peter Chubb Cc: Erich Focht Cc: Levent Serinol Cc: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/delayacct.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 include/linux/delayacct.h (limited to 'include/linux/delayacct.h') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h new file mode 100644 index 000000000000..9572cfa1f129 --- /dev/null +++ b/include/linux/delayacct.h @@ -0,0 +1,69 @@ +/* delayacct.h - per-task delay accounting + * + * Copyright (C) Shailabh Nagar, IBM Corp. 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + */ + +#ifndef _LINUX_DELAYACCT_H +#define _LINUX_DELAYACCT_H + +#include + +#ifdef CONFIG_TASK_DELAY_ACCT + +extern int delayacct_on; /* Delay accounting turned on/off */ +extern kmem_cache_t *delayacct_cache; +extern void delayacct_init(void); +extern void __delayacct_tsk_init(struct task_struct *); +extern void __delayacct_tsk_exit(struct task_struct *); + +static inline void delayacct_set_flag(int flag) +{ + if (current->delays) + current->delays->flags |= flag; +} + +static inline void delayacct_clear_flag(int flag) +{ + if (current->delays) + current->delays->flags &= ~flag; +} + +static inline void delayacct_tsk_init(struct task_struct *tsk) +{ + /* reinitialize in case parent's non-null pointer was dup'ed*/ + tsk->delays = NULL; + if (unlikely(delayacct_on)) + __delayacct_tsk_init(tsk); +} + +static inline void delayacct_tsk_exit(struct task_struct *tsk) +{ + if (tsk->delays) + __delayacct_tsk_exit(tsk); +} + +#else +static inline void delayacct_set_flag(int flag) +{} +static inline void delayacct_clear_flag(int flag) +{} +static inline void delayacct_init(void) +{} +static inline void delayacct_tsk_init(struct task_struct *tsk) +{} +static inline void delayacct_tsk_exit(struct task_struct *tsk) +{} +#endif /* CONFIG_TASK_DELAY_ACCT */ + +#endif -- cgit v1.2.3 From 0ff922452df86f3e9a2c6f705c4588ec62d096a7 Mon Sep 17 00:00:00 2001 From: Shailabh Nagar Date: Fri, 14 Jul 2006 00:24:37 -0700 Subject: [PATCH] per-task-delay-accounting: sync block I/O and swapin delay collection Unlike earlier iterations of the delay accounting patches, now delays are only collected for the actual I/O waits rather than try and cover the delays seen in I/O submission paths. Account separately for block I/O delays incurred as a result of swapin page faults whose frequency can be affected by the task/process' rss limit. Hence swapin delays can act as feedback for rss limit changes independent of I/O priority changes. Signed-off-by: Shailabh Nagar Signed-off-by: Balbir Singh Cc: Jes Sorensen Cc: Peter Chubb Cc: Erich Focht Cc: Levent Serinol Cc: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/delayacct.h | 25 +++++++++++++++++++++++++ include/linux/sched.h | 13 +++++++++++++ kernel/delayacct.c | 19 +++++++++++++++++++ kernel/sched.c | 5 +++++ mm/memory.c | 4 ++++ 5 files changed, 66 insertions(+) (limited to 'include/linux/delayacct.h') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 9572cfa1f129..0ecbf9aad8e1 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -19,6 +19,13 @@ #include +/* + * Per-task flags relevant to delay accounting + * maintained privately to avoid exhausting similar flags in sched.h:PF_* + * Used to set current->delays->flags + */ +#define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */ + #ifdef CONFIG_TASK_DELAY_ACCT extern int delayacct_on; /* Delay accounting turned on/off */ @@ -26,6 +33,8 @@ extern kmem_cache_t *delayacct_cache; extern void delayacct_init(void); extern void __delayacct_tsk_init(struct task_struct *); extern void __delayacct_tsk_exit(struct task_struct *); +extern void __delayacct_blkio_start(void); +extern void __delayacct_blkio_end(void); static inline void delayacct_set_flag(int flag) { @@ -53,6 +62,18 @@ static inline void delayacct_tsk_exit(struct task_struct *tsk) __delayacct_tsk_exit(tsk); } +static inline void delayacct_blkio_start(void) +{ + if (current->delays) + __delayacct_blkio_start(); +} + +static inline void delayacct_blkio_end(void) +{ + if (current->delays) + __delayacct_blkio_end(); +} + #else static inline void delayacct_set_flag(int flag) {} @@ -64,6 +85,10 @@ static inline void delayacct_tsk_init(struct task_struct *tsk) {} static inline void delayacct_tsk_exit(struct task_struct *tsk) {} +static inline void delayacct_blkio_start(void) +{} +static inline void delayacct_blkio_end(void) +{} #endif /* CONFIG_TASK_DELAY_ACCT */ #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 7a54e62763c5..2f43f1fb7de7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -566,6 +566,19 @@ struct task_delay_info { * Atomicity of updates to XXX_delay, XXX_count protected by * single lock above (split into XXX_lock if contention is an issue). */ + + /* + * XXX_count is incremented on every XXX operation, the delay + * associated with the operation is added to XXX_delay. + * XXX_delay contains the accumulated delay time in nanoseconds. + */ + struct timespec blkio_start, blkio_end; /* Shared by blkio, swapin */ + u64 blkio_delay; /* wait for sync block io completion */ + u64 swapin_delay; /* wait for swapin block io completion */ + u32 blkio_count; /* total count of the number of sync block */ + /* io operations performed */ + u32 swapin_count; /* total count of the number of swapin block */ + /* io operations performed */ }; #endif diff --git a/kernel/delayacct.c b/kernel/delayacct.c index fbf7f2284952..3546b0800f9f 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -85,3 +85,22 @@ static void delayacct_end(struct timespec *start, struct timespec *end, spin_unlock(¤t->delays->lock); } +void __delayacct_blkio_start(void) +{ + delayacct_start(¤t->delays->blkio_start); +} + +void __delayacct_blkio_end(void) +{ + if (current->delays->flags & DELAYACCT_PF_SWAPIN) + /* Swapin block I/O */ + delayacct_end(¤t->delays->blkio_start, + ¤t->delays->blkio_end, + ¤t->delays->swapin_delay, + ¤t->delays->swapin_count); + else /* Other block I/O */ + delayacct_end(¤t->delays->blkio_start, + ¤t->delays->blkio_end, + ¤t->delays->blkio_delay, + ¤t->delays->blkio_count); +} diff --git a/kernel/sched.c b/kernel/sched.c index e9a0b61f12ab..9d42cbfc4f8b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -4534,9 +4535,11 @@ void __sched io_schedule(void) { struct rq *rq = &__raw_get_cpu_var(runqueues); + delayacct_blkio_start(); atomic_inc(&rq->nr_iowait); schedule(); atomic_dec(&rq->nr_iowait); + delayacct_blkio_end(); } EXPORT_SYMBOL(io_schedule); @@ -4545,9 +4548,11 @@ long __sched io_schedule_timeout(long timeout) struct rq *rq = &__raw_get_cpu_var(runqueues); long ret; + delayacct_blkio_start(); atomic_inc(&rq->nr_iowait); ret = schedule_timeout(timeout); atomic_dec(&rq->nr_iowait); + delayacct_blkio_end(); return ret; } diff --git a/mm/memory.c b/mm/memory.c index de8bc85dc8f3..109e9866237e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -1934,6 +1935,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, migration_entry_wait(mm, pmd, address); goto out; } + delayacct_set_flag(DELAYACCT_PF_SWAPIN); page = lookup_swap_cache(entry); if (!page) { swapin_readahead(entry, address, vma); @@ -1946,6 +1948,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, page_table = pte_offset_map_lock(mm, pmd, address, &ptl); if (likely(pte_same(*page_table, orig_pte))) ret = VM_FAULT_OOM; + delayacct_clear_flag(DELAYACCT_PF_SWAPIN); goto unlock; } @@ -1955,6 +1958,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, grab_swap_token(); } + delayacct_clear_flag(DELAYACCT_PF_SWAPIN); mark_page_accessed(page); lock_page(page); -- cgit v1.2.3 From 6f44993fe1d7b2b097f6ac60cd5835c6f5ca0874 Mon Sep 17 00:00:00 2001 From: Shailabh Nagar Date: Fri, 14 Jul 2006 00:24:41 -0700 Subject: [PATCH] per-task-delay-accounting: delay accounting usage of taskstats interface Usage of taskstats interface by delay accounting. Signed-off-by: Shailabh Nagar Signed-off-by: Balbir Singh Cc: Jes Sorensen Cc: Peter Chubb Cc: Erich Focht Cc: Levent Serinol Cc: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/delayacct.h | 15 ++++++++++ include/linux/sched.h | 1 + include/linux/taskstats.h | 55 ++++++++++++++++++++++++++++++++++++- include/linux/taskstats_kern.h | 1 + init/Kconfig | 1 + kernel/delayacct.c | 62 +++++++++++++++++++++++++++++++++++++++++- kernel/taskstats.c | 16 +++++++---- 7 files changed, 144 insertions(+), 7 deletions(-) (limited to 'include/linux/delayacct.h') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 0ecbf9aad8e1..d955078a1441 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -18,6 +18,7 @@ #define _LINUX_DELAYACCT_H #include +#include /* * Per-task flags relevant to delay accounting @@ -35,6 +36,7 @@ extern void __delayacct_tsk_init(struct task_struct *); extern void __delayacct_tsk_exit(struct task_struct *); extern void __delayacct_blkio_start(void); extern void __delayacct_blkio_end(void); +extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); static inline void delayacct_set_flag(int flag) { @@ -74,6 +76,16 @@ static inline void delayacct_blkio_end(void) __delayacct_blkio_end(); } +static inline int delayacct_add_tsk(struct taskstats *d, + struct task_struct *tsk) +{ + if (likely(!delayacct_on)) + return -EINVAL; + if (!tsk->delays) + return 0; + return __delayacct_add_tsk(d, tsk); +} + #else static inline void delayacct_set_flag(int flag) {} @@ -89,6 +101,9 @@ static inline void delayacct_blkio_start(void) {} static inline void delayacct_blkio_end(void) {} +static inline int delayacct_add_tsk(struct taskstats *d, + struct task_struct *tsk) +{ return 0; } #endif /* CONFIG_TASK_DELAY_ACCT */ #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index f751062d89a2..3c5610ca0c92 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -990,6 +990,7 @@ struct task_struct { */ struct pipe_inode_info *splice_pipe; #ifdef CONFIG_TASK_DELAY_ACCT + spinlock_t delays_lock; struct task_delay_info *delays; #endif }; diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index 51f62759bea9..c6aeca32348e 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h @@ -34,7 +34,60 @@ struct taskstats { /* Version 1 */ - __u64 version; + __u16 version; + __u16 padding[3]; /* Userspace should not interpret the padding + * field which can be replaced by useful + * fields if struct taskstats is extended. + */ + + /* Delay accounting fields start + * + * All values, until comment "Delay accounting fields end" are + * available only if delay accounting is enabled, even though the last + * few fields are not delays + * + * xxx_count is the number of delay values recorded + * xxx_delay_total is the corresponding cumulative delay in nanoseconds + * + * xxx_delay_total wraps around to zero on overflow + * xxx_count incremented regardless of overflow + */ + + /* Delay waiting for cpu, while runnable + * count, delay_total NOT updated atomically + */ + __u64 cpu_count; + __u64 cpu_delay_total; + + /* Following four fields atomically updated using task->delays->lock */ + + /* Delay waiting for synchronous block I/O to complete + * does not account for delays in I/O submission + */ + __u64 blkio_count; + __u64 blkio_delay_total; + + /* Delay waiting for page fault I/O (swap in only) */ + __u64 swapin_count; + __u64 swapin_delay_total; + + /* cpu "wall-clock" running time + * On some architectures, value will adjust for cpu time stolen + * from the kernel in involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_real_total; + + /* cpu "virtual" running time + * Uses time intervals seen by the kernel i.e. no adjustment + * for kernel's involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_virtual_total; + /* Delay accounting fields end */ + /* version 1 ends here */ }; diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index bd0ecb969c26..fc9da2e26443 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h @@ -17,6 +17,7 @@ enum { #ifdef CONFIG_TASKSTATS extern kmem_cache_t *taskstats_cache; +extern struct mutex taskstats_exit_mutex; static inline void taskstats_exit_alloc(struct taskstats **ptidstats, struct taskstats **ptgidstats) diff --git a/init/Kconfig b/init/Kconfig index 56a7093b4e4c..a099fc6526d9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -173,6 +173,7 @@ config TASKSTATS config TASK_DELAY_ACCT bool "Enable per-task delay accounting (EXPERIMENTAL)" + depends on TASKSTATS help Collect information on time spent by a task waiting for system resources like cpu, synchronous block I/O completion and swapping diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 3546b0800f9f..1be274a462ca 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -41,6 +41,10 @@ void delayacct_init(void) void __delayacct_tsk_init(struct task_struct *tsk) { + spin_lock_init(&tsk->delays_lock); + /* No need to acquire tsk->delays_lock for allocation here unless + __delayacct_tsk_init called after tsk is attached to tasklist + */ tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL); if (tsk->delays) spin_lock_init(&tsk->delays->lock); @@ -48,8 +52,11 @@ void __delayacct_tsk_init(struct task_struct *tsk) void __delayacct_tsk_exit(struct task_struct *tsk) { - kmem_cache_free(delayacct_cache, tsk->delays); + struct task_delay_info *delays = tsk->delays; + spin_lock(&tsk->delays_lock); tsk->delays = NULL; + spin_unlock(&tsk->delays_lock); + kmem_cache_free(delayacct_cache, delays); } /* @@ -104,3 +111,56 @@ void __delayacct_blkio_end(void) ¤t->delays->blkio_delay, ¤t->delays->blkio_count); } + +int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) +{ + s64 tmp; + struct timespec ts; + unsigned long t1,t2,t3; + + spin_lock(&tsk->delays_lock); + + /* Though tsk->delays accessed later, early exit avoids + * unnecessary returning of other data + */ + if (!tsk->delays) + goto done; + + tmp = (s64)d->cpu_run_real_total; + cputime_to_timespec(tsk->utime + tsk->stime, &ts); + tmp += timespec_to_ns(&ts); + d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; + + /* + * No locking available for sched_info (and too expensive to add one) + * Mitigate by taking snapshot of values + */ + t1 = tsk->sched_info.pcnt; + t2 = tsk->sched_info.run_delay; + t3 = tsk->sched_info.cpu_time; + + d->cpu_count += t1; + + jiffies_to_timespec(t2, &ts); + tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts); + d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; + + tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000; + d->cpu_run_virtual_total = + (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; + + /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ + + spin_lock(&tsk->delays->lock); + tmp = d->blkio_delay_total + tsk->delays->blkio_delay; + d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; + tmp = d->swapin_delay_total + tsk->delays->swapin_delay; + d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; + d->blkio_count += tsk->delays->blkio_count; + d->swapin_count += tsk->delays->swapin_count; + spin_unlock(&tsk->delays->lock); + +done: + spin_unlock(&tsk->delays_lock); + return 0; +} diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 82ec9137d908..ea9506de3b85 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -18,13 +18,13 @@ #include #include +#include #include #include static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; static int family_registered; kmem_cache_t *taskstats_cache; -static DEFINE_MUTEX(taskstats_exit_mutex); static struct genl_family family = { .id = GENL_ID_GENERATE, @@ -120,7 +120,10 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk, * goto err; */ -err: + rc = delayacct_add_tsk(stats, tsk); + stats->version = TASKSTATS_VERSION; + + /* Define err: label here if needed */ put_task_struct(tsk); return rc; @@ -152,8 +155,14 @@ static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, * break; */ + rc = delayacct_add_tsk(stats, tsk); + if (rc) + break; + } while_each_thread(first, tsk); read_unlock(&tasklist_lock); + stats->version = TASKSTATS_VERSION; + /* * Accounting subsytems can also add calls here if they don't @@ -233,8 +242,6 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, if (!family_registered || !tidstats) return; - mutex_lock(&taskstats_exit_mutex); - is_thread_group = !thread_group_empty(tsk); rc = 0; @@ -292,7 +299,6 @@ nla_put_failure: err_skb: nlmsg_free(rep_skb); ret: - mutex_unlock(&taskstats_exit_mutex); return; } -- cgit v1.2.3 From 25890454667b3295f67b3372352be90705f8667c Mon Sep 17 00:00:00 2001 From: Shailabh Nagar Date: Fri, 14 Jul 2006 00:24:43 -0700 Subject: [PATCH] per-task-delay-accounting: /proc export of aggregated block I/O delays Export I/O delays seen by a task through /proc//stats for use in top etc. Note that delays for I/O done for swapping in pages (swapin I/O) is clubbed together with all other I/O here (this is not the case in the netlink interface where the swapin I/O is kept distinct) [akpm@osdl.org: printk warning fix] Signed-off-by: Shailabh Nagar Signed-off-by: Balbir Singh Cc: Jes Sorensen Cc: Peter Chubb Cc: Erich Focht Cc: Levent Serinol Cc: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 6 ++++-- include/linux/delayacct.h | 10 ++++++++++ kernel/delayacct.c | 12 ++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include/linux/delayacct.h') diff --git a/fs/proc/array.c b/fs/proc/array.c index 7495d3e20775..0b615d62a159 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -74,6 +74,7 @@ #include #include #include +#include #include #include @@ -411,7 +412,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ -%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n", +%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu %llu\n", task->pid, tcomm, state, @@ -455,7 +456,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) task->exit_signal, task_cpu(task), task->rt_priority, - task->policy); + task->policy, + (unsigned long long)delayacct_blkio_ticks(task)); if(mm) mmput(mm); return res; diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index d955078a1441..7e8b6011b8f3 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -37,6 +37,7 @@ extern void __delayacct_tsk_exit(struct task_struct *); extern void __delayacct_blkio_start(void); extern void __delayacct_blkio_end(void); extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); +extern __u64 __delayacct_blkio_ticks(struct task_struct *); static inline void delayacct_set_flag(int flag) { @@ -86,6 +87,13 @@ static inline int delayacct_add_tsk(struct taskstats *d, return __delayacct_add_tsk(d, tsk); } +static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) +{ + if (tsk->delays) + return __delayacct_blkio_ticks(tsk); + return 0; +} + #else static inline void delayacct_set_flag(int flag) {} @@ -104,6 +112,8 @@ static inline void delayacct_blkio_end(void) static inline int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) { return 0; } +static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) +{ return 0; } #endif /* CONFIG_TASK_DELAY_ACCT */ #endif diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 1be274a462ca..f05392d64267 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -164,3 +164,15 @@ done: spin_unlock(&tsk->delays_lock); return 0; } + +__u64 __delayacct_blkio_ticks(struct task_struct *tsk) +{ + __u64 ret; + + spin_lock(&tsk->delays->lock); + ret = nsec_to_clock_t(tsk->delays->blkio_delay + + tsk->delays->swapin_delay); + spin_unlock(&tsk->delays->lock); + return ret; +} + -- cgit v1.2.3 From 7d94dddd438bcba97db44f120da39bb001b5249f Mon Sep 17 00:00:00 2001 From: Shailabh Nagar Date: Sun, 30 Jul 2006 03:03:10 -0700 Subject: [PATCH] make taskstats sending completely independent of delay accounting on/off status Complete the separation of delay accounting and taskstats by ignoring the return value of delay accounting functions that fill in parts of taskstats before it is sent out (either in response to a command or as part of a task exit). Also make delayacct_add_tsk return silently when delay accounting is turned off rather than treat it as an error. Signed-off-by: Shailabh Nagar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/delayacct.h | 4 +--- kernel/taskstats.c | 8 +++----- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux/delayacct.h') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 7e8b6011b8f3..8a284cc6fd5f 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -80,9 +80,7 @@ static inline void delayacct_blkio_end(void) static inline int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) { - if (likely(!delayacct_on)) - return -EINVAL; - if (!tsk->delays) + if (likely(!delayacct_on) || !tsk->delays) return 0; return __delayacct_add_tsk(d, tsk); } diff --git a/kernel/taskstats.c b/kernel/taskstats.c index f45179ce028e..b4c737a11408 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -177,7 +177,7 @@ static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) static int fill_pid(pid_t pid, struct task_struct *pidtsk, struct taskstats *stats) { - int rc; + int rc = 0; struct task_struct *tsk = pidtsk; if (!pidtsk) { @@ -196,12 +196,10 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk, * Each accounting subsystem adds calls to its functions to * fill in relevant parts of struct taskstsats as follows * - * rc = per-task-foo(stats, tsk); - * if (rc) - * goto err; + * per-task-foo(stats, tsk); */ - rc = delayacct_add_tsk(stats, tsk); + delayacct_add_tsk(stats, tsk); stats->version = TASKSTATS_VERSION; /* Define err: label here if needed */ -- cgit v1.2.3 From 163ecdff060f2fa9e8f5238882fd0137493556a6 Mon Sep 17 00:00:00 2001 From: Shailabh Nagar Date: Sun, 30 Jul 2006 03:03:11 -0700 Subject: [PATCH] delay accounting: temporarily enable by default Enable delay accounting by default so that feature gets coverage testing without requiring special measures. Earlier, it was off by default and had to be enabled via a boot time param. This patch reverses the default behaviour to improve coverage testing. It can be removed late in the kernel development cycle if its believed users shouldn't have to incur any cost if they don't want delay accounting. Or it can be retained forever if the utility of the stats is deemed common enough to warrant keeping the feature on. Signed-off-by: Shailabh Nagar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/accounting/delay-accounting.txt | 10 ++++++---- Documentation/kernel-parameters.txt | 4 ++-- include/linux/delayacct.h | 4 ++-- kernel/delayacct.c | 8 ++++---- 4 files changed, 14 insertions(+), 12 deletions(-) (limited to 'include/linux/delayacct.h') diff --git a/Documentation/accounting/delay-accounting.txt b/Documentation/accounting/delay-accounting.txt index be215e58423b..1443cd71d263 100644 --- a/Documentation/accounting/delay-accounting.txt +++ b/Documentation/accounting/delay-accounting.txt @@ -64,11 +64,13 @@ Compile the kernel with CONFIG_TASK_DELAY_ACCT=y CONFIG_TASKSTATS=y -Enable the accounting at boot time by adding -the following to the kernel boot options - delayacct +Delay accounting is enabled by default at boot up. +To disable, add + nodelayacct +to the kernel boot options. The rest of the instructions +below assume this has not been done. -and after the system has booted up, use a utility +After the system has booted up, use a utility similar to getdelays.c to access the delays seen by a given task or a task group (tgid). The utility also allows a given command to be diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e11f7728ec6f..b50595a0550f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -448,8 +448,6 @@ running once the system is up. Format: [,] See also Documentation/networking/decnet.txt. - delayacct [KNL] Enable per-task delay accounting - dhash_entries= [KNL] Set number of hash buckets for dentry cache. @@ -1031,6 +1029,8 @@ running once the system is up. nocache [ARM] + nodelayacct [KNL] Disable per-task delay accounting + nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects. noexec [IA-64] diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 8a284cc6fd5f..11487b6e7127 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -55,7 +55,7 @@ static inline void delayacct_tsk_init(struct task_struct *tsk) { /* reinitialize in case parent's non-null pointer was dup'ed*/ tsk->delays = NULL; - if (unlikely(delayacct_on)) + if (delayacct_on) __delayacct_tsk_init(tsk); } @@ -80,7 +80,7 @@ static inline void delayacct_blkio_end(void) static inline int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) { - if (likely(!delayacct_on) || !tsk->delays) + if (!delayacct_on || !tsk->delays) return 0; return __delayacct_add_tsk(d, tsk); } diff --git a/kernel/delayacct.c b/kernel/delayacct.c index f05392d64267..57ca3730205d 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -19,15 +19,15 @@ #include #include -int delayacct_on __read_mostly; /* Delay accounting turned on/off */ +int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */ kmem_cache_t *delayacct_cache; -static int __init delayacct_setup_enable(char *str) +static int __init delayacct_setup_disable(char *str) { - delayacct_on = 1; + delayacct_on = 0; return 1; } -__setup("delayacct", delayacct_setup_enable); +__setup("nodelayacct", delayacct_setup_disable); void delayacct_init(void) { -- cgit v1.2.3 From 35df17c57cecb08f0120fb18926325f1093dc429 Mon Sep 17 00:00:00 2001 From: Shailabh Nagar Date: Thu, 31 Aug 2006 21:27:38 -0700 Subject: [PATCH] task delay accounting fixes Cleanup allocation and freeing of tsk->delays used by delay accounting. This solves two problems reported for delay accounting: 1. oops in __delayacct_blkio_ticks http://www.uwsg.indiana.edu/hypermail/linux/kernel/0608.2/1844.html Currently tsk->delays is getting freed too early in task exit which can cause a NULL tsk->delays to get accessed via reading of /proc//stats. The patch fixes this problem by freeing tsk->delays closer to when task_struct itself is freed up. As a result, it also eliminates the use of tsk->delays_lock which was only being used (inadequately) to safeguard access to tsk->delays while a task was exiting. 2. Possible memory leak in kernel/delayacct.c http://www.uwsg.indiana.edu/hypermail/linux/kernel/0608.2/1389.html The patch cleans up tsk->delays allocations after a bad fork which was missing earlier. The patch has been tested to fix the problems listed above and stress tested with rapid calls to delay accounting's taskstats command interface (which is the other path that can access the same data, besides the /proc interface causing the oops above). Signed-off-by: Shailabh Nagar Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/delayacct.h | 10 +++++++--- include/linux/sched.h | 1 - kernel/delayacct.c | 16 ---------------- kernel/exit.c | 1 - kernel/fork.c | 6 ++++-- 5 files changed, 11 insertions(+), 23 deletions(-) (limited to 'include/linux/delayacct.h') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 11487b6e7127..561e2a77805c 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -59,10 +59,14 @@ static inline void delayacct_tsk_init(struct task_struct *tsk) __delayacct_tsk_init(tsk); } -static inline void delayacct_tsk_exit(struct task_struct *tsk) +/* Free tsk->delays. Called from bad fork and __put_task_struct + * where there's no risk of tsk->delays being accessed elsewhere + */ +static inline void delayacct_tsk_free(struct task_struct *tsk) { if (tsk->delays) - __delayacct_tsk_exit(tsk); + kmem_cache_free(delayacct_cache, tsk->delays); + tsk->delays = NULL; } static inline void delayacct_blkio_start(void) @@ -101,7 +105,7 @@ static inline void delayacct_init(void) {} static inline void delayacct_tsk_init(struct task_struct *tsk) {} -static inline void delayacct_tsk_exit(struct task_struct *tsk) +static inline void delayacct_tsk_free(struct task_struct *tsk) {} static inline void delayacct_blkio_start(void) {} diff --git a/include/linux/sched.h b/include/linux/sched.h index 6674fc1e51bf..34ed0d99b1bd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -994,7 +994,6 @@ struct task_struct { */ struct pipe_inode_info *splice_pipe; #ifdef CONFIG_TASK_DELAY_ACCT - spinlock_t delays_lock; struct task_delay_info *delays; #endif }; diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 57ca3730205d..36752f124c6a 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -41,24 +41,11 @@ void delayacct_init(void) void __delayacct_tsk_init(struct task_struct *tsk) { - spin_lock_init(&tsk->delays_lock); - /* No need to acquire tsk->delays_lock for allocation here unless - __delayacct_tsk_init called after tsk is attached to tasklist - */ tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL); if (tsk->delays) spin_lock_init(&tsk->delays->lock); } -void __delayacct_tsk_exit(struct task_struct *tsk) -{ - struct task_delay_info *delays = tsk->delays; - spin_lock(&tsk->delays_lock); - tsk->delays = NULL; - spin_unlock(&tsk->delays_lock); - kmem_cache_free(delayacct_cache, delays); -} - /* * Start accounting for a delay statistic using * its starting timestamp (@start) @@ -118,8 +105,6 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) struct timespec ts; unsigned long t1,t2,t3; - spin_lock(&tsk->delays_lock); - /* Though tsk->delays accessed later, early exit avoids * unnecessary returning of other data */ @@ -161,7 +146,6 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) spin_unlock(&tsk->delays->lock); done: - spin_unlock(&tsk->delays_lock); return 0; } diff --git a/kernel/exit.c b/kernel/exit.c index dba194a8d416..a4c19a52ce46 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -908,7 +908,6 @@ fastcall NORET_TYPE void do_exit(long code) audit_free(tsk); taskstats_exit_send(tsk, tidstats, group_dead, mycpu); taskstats_exit_free(tidstats); - delayacct_tsk_exit(tsk); exit_mm(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index aa36c43783cc..f9b014e3e700 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -117,6 +117,7 @@ void __put_task_struct(struct task_struct *tsk) security_task_free(tsk); free_uid(tsk->user); put_group_info(tsk->group_info); + delayacct_tsk_free(tsk); if (!profile_handoff_task(tsk)) free_task(tsk); @@ -1011,7 +1012,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) if (put_user(p->pid, parent_tidptr)) - goto bad_fork_cleanup; + goto bad_fork_cleanup_delays_binfmt; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); @@ -1277,7 +1278,8 @@ bad_fork_cleanup_policy: bad_fork_cleanup_cpuset: #endif cpuset_exit(p); -bad_fork_cleanup: +bad_fork_cleanup_delays_binfmt: + delayacct_tsk_free(p); if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: -- cgit v1.2.3