From a3232d2fa2e3cbab3e76d91cdae5890fee8a4034 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:08:45 -0400 Subject: capabilities: delete all CAP_INIT macros The CAP_INIT macros of INH, BSET, and EFF made sense at one point in time, but now days they aren't helping. Just open code the logic in the init_cred. Signed-off-by: Eric Paris Acked-by: David Howells Signed-off-by: James Morris --- include/linux/init_task.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index caa151fbebb7..1f277204de34 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -83,13 +83,6 @@ extern struct group_info init_groups; #define INIT_IDS #endif -/* - * Because of the reduced scope of CAP_SETPCAP when filesystem - * capabilities are in effect, it is safe to allow CAP_SETPCAP to - * be available in the default configuration. - */ -# define CAP_INIT_BSET CAP_FULL_SET - #ifdef CONFIG_RCU_BOOST #define INIT_TASK_RCU_BOOST() \ .rcu_boost_mutex = NULL, -- cgit v1.2.3 From 625f2a378e5a10f45fdc37932fc9f8a21676de9e Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 22 Apr 2011 11:19:10 -0600 Subject: sched: Get rid of lock_depth Neil Brown pointed out that lock_depth somehow escaped the BKL removal work. Let's get rid of it now. Note that the perf scripting utilities still have a bunch of code for dealing with common_lock_depth in tracepoints; I have left that in place in case anybody wants to use that code with older kernels. Suggested-by: Neil Brown Signed-off-by: Jonathan Corbet Cc: Arnd Bergmann Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/20110422111910.456c0e84@bike.lwn.net Signed-off-by: Ingo Molnar --- Documentation/trace/kprobetrace.txt | 1 - include/linux/init_task.h | 1 - include/linux/sched.h | 6 ------ kernel/fork.c | 1 - kernel/mutex.c | 7 ------- kernel/sched.c | 11 +---------- kernel/sched_debug.c | 4 ---- kernel/trace/trace_kprobe.c | 1 - tools/perf/Documentation/perf-script-perl.txt | 1 - tools/perf/Documentation/perf-script-python.txt | 1 - 10 files changed, 1 insertion(+), 33 deletions(-) (limited to 'include/linux/init_task.h') diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 6d27ab8d6e9f..c83bd6b4e6e8 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -120,7 +120,6 @@ format: field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1;signed:0; field:int common_pid; offset:4; size:4; signed:1; - field:int common_lock_depth; offset:8; size:4; signed:1; field:unsigned long __probe_ip; offset:12; size:4; signed:0; field:int __probe_nargs; offset:16; size:4; signed:1; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index caa151fbebb7..689496bb6654 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -134,7 +134,6 @@ extern struct cred init_cred; .stack = &init_thread_info, \ .usage = ATOMIC_INIT(2), \ .flags = PF_KTHREAD, \ - .lock_depth = -1, \ .prio = MAX_PRIO-20, \ .static_prio = MAX_PRIO-20, \ .normal_prio = MAX_PRIO-20, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 171ba24b08a7..013314a56105 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -731,10 +731,6 @@ struct sched_info { /* timestamps */ unsigned long long last_arrival,/* when we last ran on a cpu */ last_queued; /* when we were last queued to run */ -#ifdef CONFIG_SCHEDSTATS - /* BKL stats */ - unsigned int bkl_count; -#endif }; #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ @@ -1190,8 +1186,6 @@ struct task_struct { unsigned int flags; /* per process flags, defined below */ unsigned int ptrace; - int lock_depth; /* BKL lock depth */ - #ifdef CONFIG_SMP struct task_struct *wake_entry; int on_cpu; diff --git a/kernel/fork.c b/kernel/fork.c index e7548dee636b..aca62871a4f9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1103,7 +1103,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, posix_cpu_timers_init(p); - p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); diff --git a/kernel/mutex.c b/kernel/mutex.c index fe4706cb0c5b..2c938e2337cd 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -162,13 +162,6 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, for (;;) { struct task_struct *owner; - /* - * If we own the BKL, then don't spin. The owner of - * the mutex might be waiting on us to release the BKL. - */ - if (unlikely(current->lock_depth >= 0)) - break; - /* * If there's an owner, wait for it to either * release the lock or go to sleep. diff --git a/kernel/sched.c b/kernel/sched.c index 8cb0a5769a16..9cde2dd229c9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4121,12 +4121,6 @@ static inline void schedule_debug(struct task_struct *prev) profile_hit(SCHED_PROFILING, __builtin_return_address(0)); schedstat_inc(this_rq(), sched_count); -#ifdef CONFIG_SCHEDSTATS - if (unlikely(prev->lock_depth >= 0)) { - schedstat_inc(this_rq(), rq_sched_info.bkl_count); - schedstat_inc(prev, sched_info.bkl_count); - } -#endif } static void put_prev_task(struct rq *rq, struct task_struct *prev) @@ -5852,11 +5846,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) raw_spin_unlock_irqrestore(&rq->lock, flags); /* Set the preempt count _outside_ the spinlocks! */ -#if defined(CONFIG_PREEMPT) - task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0); -#else task_thread_info(idle)->preempt_count = 0; -#endif + /* * The idle tasks have their own, simple scheduling class: */ diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 3669bec6e130..a6710a112b4f 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -296,9 +296,6 @@ static void print_cpu(struct seq_file *m, int cpu) P(ttwu_count); P(ttwu_local); - SEQ_printf(m, " .%-30s: %d\n", "bkl_count", - rq->rq_sched_info.bkl_count); - #undef P #undef P64 #endif @@ -441,7 +438,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) P(se.statistics.wait_count); PN(se.statistics.iowait_sum); P(se.statistics.iowait_count); - P(sched_info.bkl_count); P(se.nr_migrations); P(se.statistics.nr_migrations_cold); P(se.statistics.nr_failed_migrations_affine); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 35d55a386145..f925c45f0afa 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -53,7 +53,6 @@ const char *reserved_field_names[] = { "common_preempt_count", "common_pid", "common_tgid", - "common_lock_depth", FIELD_STRING_IP, FIELD_STRING_RETIP, FIELD_STRING_FUNC, diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt index 5bb41e55a3ac..3152cca15501 100644 --- a/tools/perf/Documentation/perf-script-perl.txt +++ b/tools/perf/Documentation/perf-script-perl.txt @@ -63,7 +63,6 @@ The format file for the sched_wakep event defines the following fields field:unsigned char common_flags; field:unsigned char common_preempt_count; field:int common_pid; - field:int common_lock_depth; field:char comm[TASK_COMM_LEN]; field:pid_t pid; diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt index 36b38277422c..471022069119 100644 --- a/tools/perf/Documentation/perf-script-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt @@ -463,7 +463,6 @@ The format file for the sched_wakep event defines the following fields field:unsigned char common_flags; field:unsigned char common_preempt_count; field:int common_pid; - field:int common_lock_depth; field:char comm[TASK_COMM_LEN]; field:pid_t pid; -- cgit v1.2.3 From 4714d1d32d97239fb5ae3e10521d3f133a899b66 Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Thu, 26 May 2011 16:25:18 -0700 Subject: cgroups: read-write lock CLONE_THREAD forking per threadgroup Adds functionality to read/write lock CLONE_THREAD fork()ing per-threadgroup Add an rwsem that lives in a threadgroup's signal_struct that's taken for reading in the fork path, under CONFIG_CGROUPS. If another part of the kernel later wants to use such a locking mechanism, the CONFIG_CGROUPS ifdefs should be changed to a higher-up flag that CGROUPS and the other system would both depend on. This is a pre-patch for cgroup-procs-write.patch. Signed-off-by: Ben Blum Cc: "Eric W. Biederman" Cc: Li Zefan Cc: Matt Helsley Reviewed-by: Paul Menage Cc: Oleg Nesterov Cc: David Rientjes Cc: Miao Xie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init_task.h | 9 +++++++++ include/linux/sched.h | 36 ++++++++++++++++++++++++++++++++++++ kernel/fork.c | 10 ++++++++++ 3 files changed, 55 insertions(+) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index bafc58c00fc3..580f70c02391 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -22,6 +22,14 @@ extern struct files_struct init_files; extern struct fs_struct init_fs; +#ifdef CONFIG_CGROUPS +#define INIT_THREADGROUP_FORK_LOCK(sig) \ + .threadgroup_fork_lock = \ + __RWSEM_INITIALIZER(sig.threadgroup_fork_lock), +#else +#define INIT_THREADGROUP_FORK_LOCK(sig) +#endif + #define INIT_SIGNALS(sig) { \ .nr_threads = 1, \ .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\ @@ -38,6 +46,7 @@ extern struct fs_struct init_fs; }, \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ + INIT_THREADGROUP_FORK_LOCK(sig) \ } extern struct nsproxy init_nsproxy; diff --git a/include/linux/sched.h b/include/linux/sched.h index f18300eddfcb..dc8871295a5a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -513,6 +513,7 @@ struct thread_group_cputimer { spinlock_t lock; }; +#include struct autogroup; /* @@ -632,6 +633,16 @@ struct signal_struct { unsigned audit_tty; struct tty_audit_buf *tty_audit_buf; #endif +#ifdef CONFIG_CGROUPS + /* + * The threadgroup_fork_lock prevents threads from forking with + * CLONE_THREAD while held for writing. Use this for fork-sensitive + * threadgroup-wide operations. It's taken for reading in fork.c in + * copy_process(). + * Currently only needed write-side by cgroups. + */ + struct rw_semaphore threadgroup_fork_lock; +#endif int oom_adj; /* OOM kill score adjustment (bit shift) */ int oom_score_adj; /* OOM kill score adjustment */ @@ -2323,6 +2334,31 @@ static inline void unlock_task_sighand(struct task_struct *tsk, spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); } +/* See the declaration of threadgroup_fork_lock in signal_struct. */ +#ifdef CONFIG_CGROUPS +static inline void threadgroup_fork_read_lock(struct task_struct *tsk) +{ + down_read(&tsk->signal->threadgroup_fork_lock); +} +static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) +{ + up_read(&tsk->signal->threadgroup_fork_lock); +} +static inline void threadgroup_fork_write_lock(struct task_struct *tsk) +{ + down_write(&tsk->signal->threadgroup_fork_lock); +} +static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) +{ + up_write(&tsk->signal->threadgroup_fork_lock); +} +#else +static inline void threadgroup_fork_read_lock(struct task_struct *tsk) {} +static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) {} +static inline void threadgroup_fork_write_lock(struct task_struct *tsk) {} +static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) {} +#endif + #ifndef __HAVE_THREAD_FUNCTIONS #define task_thread_info(task) ((struct thread_info *)(task)->stack) diff --git a/kernel/fork.c b/kernel/fork.c index 8e7e135d0817..1fa9d940e301 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -957,6 +957,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) tty_audit_fork(sig); sched_autogroup_fork(sig); +#ifdef CONFIG_CGROUPS + init_rwsem(&sig->threadgroup_fork_lock); +#endif + sig->oom_adj = current->signal->oom_adj; sig->oom_score_adj = current->signal->oom_score_adj; sig->oom_score_adj_min = current->signal->oom_score_adj_min; @@ -1138,6 +1142,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, monotonic_to_bootbased(&p->real_start_time); p->io_context = NULL; p->audit_context = NULL; + if (clone_flags & CLONE_THREAD) + threadgroup_fork_read_lock(current); cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); @@ -1342,6 +1348,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); + if (clone_flags & CLONE_THREAD) + threadgroup_fork_read_unlock(current); perf_event_fork(p); return p; @@ -1380,6 +1388,8 @@ bad_fork_cleanup_policy: mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif + if (clone_flags & CLONE_THREAD) + threadgroup_fork_read_unlock(current); cgroup_exit(p, cgroup_callbacks_done); delayacct_tsk_free(p); module_put(task_thread_info(p)->exec_domain->module); -- cgit v1.2.3