18 files changed, 385 insertions, 192 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 2e3f4a47e7d0..6312d6bd43e3 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -54,6 +54,7 @@
 #include <linux/jiffies.h>
 #include <linux/times.h>
 #include <linux/syscalls.h>
+#include <linux/mount.h>
 #include <asm/uaccess.h>
 #include <asm/div64.h>
 #include <linux/blkdev.h> /* sector_div */
@@ -192,6 +193,7 @@ static void acct_file_reopen(struct file *file)
 		add_timer(&acct_globals.timer);
 	}
 	if (old_acct) {
+		mnt_unpin(old_acct->f_vfsmnt);
 		spin_unlock(&acct_globals.lock);
 		do_acct_process(0, old_acct);
 		filp_close(old_acct, NULL);
@@ -199,6 +201,42 @@ static void acct_file_reopen(struct file *file)
 	}
 }
 
+static int acct_on(char *name)
+{
+	struct file *file;
+	int error;
+
+	/* Difference from BSD - they don't do O_APPEND */
+	file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
+		filp_close(file, NULL);
+		return -EACCES;
+	}
+
+	if (!file->f_op->write) {
+		filp_close(file, NULL);
+		return -EIO;
+	}
+
+	error = security_acct(file);
+	if (error) {
+		filp_close(file, NULL);
+		return error;
+	}
+
+	spin_lock(&acct_globals.lock);
+	mnt_pin(file->f_vfsmnt);
+	acct_file_reopen(file);
+	spin_unlock(&acct_globals.lock);
+
+	mntput(file->f_vfsmnt);	/* it's pinned, now give up active reference */
+
+	return 0;
+}
+
 /**
  * sys_acct - enable/disable process accounting
  * @name: file name for accounting records or NULL to shutdown accounting
@@ -212,47 +250,41 @@ static void acct_file_reopen(struct file *file)
  */
 asmlinkage long sys_acct(const char __user *name)
 {
-	struct file *file = NULL;
-	char *tmp;
 	int error;
 
 	if (!capable(CAP_SYS_PACCT))
 		return -EPERM;
 
 	if (name) {
-		tmp = getname(name);
-		if (IS_ERR(tmp)) {
+		char *tmp = getname(name);
+		if (IS_ERR(tmp))
 			return (PTR_ERR(tmp));
-		}
-		/* Difference from BSD - they don't do O_APPEND */
-		file = filp_open(tmp, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
+		error = acct_on(tmp);
 		putname(tmp);
-		if (IS_ERR(file)) {
-			return (PTR_ERR(file));
-		}
-		if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
-			filp_close(file, NULL);
-			return (-EACCES);
-		}
-
-		if (!file->f_op->write) {
-			filp_close(file, NULL);
-			return (-EIO);
+	} else {
+		error = security_acct(NULL);
+		if (!error) {
+			spin_lock(&acct_globals.lock);
+			acct_file_reopen(NULL);
+			spin_unlock(&acct_globals.lock);
 		}
 	}
+	return error;
+}
 
-	error = security_acct(file);
-	if (error) {
-		if (file)
-			filp_close(file, NULL);
-		return error;
-	}
-
+/**
+ * acct_auto_close - turn off a filesystem's accounting if it is on
+ * @m: vfsmount being shut down
+ *
+ * If the accounting is turned on for a file in the subtree pointed to
+ * to by m, turn accounting off.  Done when m is about to die.
+ */
+void acct_auto_close_mnt(struct vfsmount *m)
+{
 	spin_lock(&acct_globals.lock);
-	acct_file_reopen(file);
+	if (acct_globals.file && acct_globals.file->f_vfsmnt == m)
+		acct_file_reopen(NULL);
 	spin_unlock(&acct_globals.lock);
-
-	return (0);
 }
 
 /**
@@ -266,8 +298,8 @@ void acct_auto_close(struct super_block *sb)
 {
 	spin_lock(&acct_globals.lock);
 	if (acct_globals.file &&
-	    acct_globals.file->f_dentry->d_inode->i_sb == sb) {
-		acct_file_reopen((struct file *)NULL);
+	    acct_globals.file->f_vfsmnt->mnt_sb == sb) {
+		acct_file_reopen(NULL);
 	}
 	spin_unlock(&acct_globals.lock);
 }
diff --git a/kernel/exit.c b/kernel/exit.c
index 537394b25e8d..452a1d116178 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -28,6 +28,7 @@
 #include <linux/cpuset.h>
 #include <linux/syscalls.h>
 #include <linux/signal.h>
+#include <linux/cn_proc.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -863,6 +864,7 @@ fastcall NORET_TYPE void do_exit(long code)
 		module_put(tsk->binfmt->module);
 
 	tsk->exit_code = code;
+	proc_exit_connector(tsk);
 	exit_notify(tsk);
 #ifdef CONFIG_NUMA
 	mpol_free(tsk->mempolicy);
diff --git a/kernel/fork.c b/kernel/fork.c
index 8a069612eac3..158710d22566 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -42,6 +42,7 @@
 #include <linux/profile.h>
 #include <linux/rmap.h>
 #include <linux/acct.h>
+#include <linux/cn_proc.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -469,13 +470,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 	if (clone_flags & CLONE_VM) {
 		atomic_inc(&oldmm->mm_users);
 		mm = oldmm;
-		/*
-		 * There are cases where the PTL is held to ensure no
-		 * new threads start up in user mode using an mm, which
-		 * allows optimizing out ipis; the tlb_gather_mmu code
-		 * is an example.
-		 */
-		spin_unlock_wait(&oldmm->page_table_lock);
 		goto good_mm;
 	}
 
@@ -1143,6 +1137,7 @@ static task_t *copy_process(unsigned long clone_flags,
 			__get_cpu_var(process_counts)++;
 	}
 
+	proc_fork_connector(p);
 	if (!current->signal->tty && p->signal->tty)
 		p->signal->tty = NULL;
 
diff --git a/kernel/futex.c b/kernel/futex.c
index 3b4d5ad44cc6..aca8d10704f6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -365,6 +365,11 @@ retry:
 		if (bh1 != bh2)
 			spin_unlock(&bh2->lock);
 
+		if (unlikely(op_ret != -EFAULT)) {
+			ret = op_ret;
+			goto out;
+		}
+
 		/* futex_atomic_op_inuser needs to both read and write
 		 * *(int __user *)uaddr2, but we can't modify it
 		 * non-atomically.  Therefore, if get_user below is not
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1cfdb08ddf20..3bd7226d15fa 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -24,6 +24,7 @@ cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
 
 /**
  *	synchronize_irq - wait for pending IRQ handlers (on other CPUs)
+ *	@irq: interrupt number to wait for
  *
  *	This function waits for any pending IRQ handlers for this interrupt
  *	to complete before returning. If you use this function while
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ce4915dd683a..5beda378cc75 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -32,7 +32,6 @@
  *		<prasanna@in.ibm.com> added function-return probes.
  */
 #include <linux/kprobes.h>
-#include <linux/spinlock.h>
 #include <linux/hash.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -49,9 +48,9 @@
 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
 static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
 
-unsigned int kprobe_cpu = NR_CPUS;
-static DEFINE_SPINLOCK(kprobe_lock);
-static struct kprobe *curr_kprobe;
+static DEFINE_SPINLOCK(kprobe_lock);	/* Protects kprobe_table */
+DEFINE_SPINLOCK(kretprobe_lock);	/* Protects kretprobe_inst_table */
+static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
 
 /*
  * kprobe->ainsn.insn points to the copy of the instruction to be
@@ -153,50 +152,31 @@ void __kprobes free_insn_slot(kprobe_opcode_t *slot)
 	}
 }
 
-/* Locks kprobe: irqs must be disabled */
-void __kprobes lock_kprobes(void)
+/* We have preemption disabled.. so it is safe to use __ versions */
+static inline void set_kprobe_instance(struct kprobe *kp)
 {
-	unsigned long flags = 0;
-
-	/* Avoiding local interrupts to happen right after we take the kprobe_lock
-	 * and before we get a chance to update kprobe_cpu, this to prevent
-	 * deadlock when we have a kprobe on ISR routine and a kprobe on task
-	 * routine
-	 */
-	local_irq_save(flags);
-
-	spin_lock(&kprobe_lock);
-	kprobe_cpu = smp_processor_id();
-
- 	local_irq_restore(flags);
+	__get_cpu_var(kprobe_instance) = kp;
 }
 
-void __kprobes unlock_kprobes(void)
+static inline void reset_kprobe_instance(void)
 {
-	unsigned long flags = 0;
-
-	/* Avoiding local interrupts to happen right after we update
-	 * kprobe_cpu and before we get a a chance to release kprobe_lock,
-	 * this to prevent deadlock when we have a kprobe on ISR routine and
-	 * a kprobe on task routine
-	 */
-	local_irq_save(flags);
-
-	kprobe_cpu = NR_CPUS;
-	spin_unlock(&kprobe_lock);
-
- 	local_irq_restore(flags);
+	__get_cpu_var(kprobe_instance) = NULL;
 }
 
-/* You have to be holding the kprobe_lock */
+/*
+ * This routine is called either:
+ * 	- under the kprobe_lock spinlock - during kprobe_[un]register()
+ * 				OR
+ * 	- with preemption disabled - from arch/xxx/kernel/kprobes.c
+ */
 struct kprobe __kprobes *get_kprobe(void *addr)
 {
 	struct hlist_head *head;
 	struct hlist_node *node;
+	struct kprobe *p;
 
 	head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
-	hlist_for_each(node, head) {
-		struct kprobe *p = hlist_entry(node, struct kprobe, hlist);
+	hlist_for_each_entry_rcu(p, node, head, hlist) {
 		if (p->addr == addr)
 			return p;
 	}
@@ -211,13 +191,13 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe *kp;
 
-	list_for_each_entry(kp, &p->list, list) {
+	list_for_each_entry_rcu(kp, &p->list, list) {
 		if (kp->pre_handler) {
-			curr_kprobe = kp;
+			set_kprobe_instance(kp);
 			if (kp->pre_handler(kp, regs))
 				return 1;
 		}
-		curr_kprobe = NULL;
+		reset_kprobe_instance();
 	}
 	return 0;
 }
@@ -227,11 +207,11 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
 {
 	struct kprobe *kp;
 
-	list_for_each_entry(kp, &p->list, list) {
+	list_for_each_entry_rcu(kp, &p->list, list) {
 		if (kp->post_handler) {
-			curr_kprobe = kp;
+			set_kprobe_instance(kp);
 			kp->post_handler(kp, regs, flags);
-			curr_kprobe = NULL;
+			reset_kprobe_instance();
 		}
 	}
 	return;
@@ -240,12 +220,14 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
 static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
 					int trapnr)
 {
+	struct kprobe *cur = __get_cpu_var(kprobe_instance);
+
 	/*
 	 * if we faulted "during" the execution of a user specified
 	 * probe handler, invoke just that probe's fault handler
 	 */
-	if (curr_kprobe && curr_kprobe->fault_handler) {
-		if (curr_kprobe->fault_handler(curr_kprobe, regs, trapnr))
+	if (cur && cur->fault_handler) {
+		if (cur->fault_handler(cur, regs, trapnr))
 			return 1;
 	}
 	return 0;
@@ -253,17 +235,18 @@ static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
 
 static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
-	struct kprobe *kp = curr_kprobe;
-	if (curr_kprobe && kp->break_handler) {
-		if (kp->break_handler(kp, regs)) {
-			curr_kprobe = NULL;
-			return 1;
-		}
+	struct kprobe *cur = __get_cpu_var(kprobe_instance);
+	int ret = 0;
+
+	if (cur && cur->break_handler) {
+		if (cur->break_handler(cur, regs))
+			ret = 1;
 	}
-	curr_kprobe = NULL;
-	return 0;
+	reset_kprobe_instance();
+	return ret;
 }
 
+/* Called with kretprobe_lock held */
 struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp)
 {
 	struct hlist_node *node;
@@ -273,6 +256,7 @@ struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp)
 	return NULL;
 }
 
+/* Called with kretprobe_lock held */
 static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe
 							      *rp)
 {
@@ -283,6 +267,7 @@ static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe
 	return NULL;
 }
 
+/* Called with kretprobe_lock held */
 void __kprobes add_rp_inst(struct kretprobe_instance *ri)
 {
 	/*
@@ -301,6 +286,7 @@ void __kprobes add_rp_inst(struct kretprobe_instance *ri)
 	hlist_add_head(&ri->uflist, &ri->rp->used_instances);
 }
 
+/* Called with kretprobe_lock held */
 void __kprobes recycle_rp_inst(struct kretprobe_instance *ri)
 {
 	/* remove rp inst off the rprobe_inst_table */
@@ -334,13 +320,13 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
 	struct hlist_node *node, *tmp;
 	unsigned long flags = 0;
 
-	spin_lock_irqsave(&kprobe_lock, flags);
+	spin_lock_irqsave(&kretprobe_lock, flags);
         head = kretprobe_inst_table_head(current);
         hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
                 if (ri->task == tk)
                         recycle_rp_inst(ri);
         }
-	spin_unlock_irqrestore(&kprobe_lock, flags);
+	spin_unlock_irqrestore(&kretprobe_lock, flags);
 }
 
 /*
@@ -351,9 +337,12 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
 					   struct pt_regs *regs)
 {
 	struct kretprobe *rp = container_of(p, struct kretprobe, kp);
+	unsigned long flags = 0;
 
 	/*TODO: consider to only swap the RA after the last pre_handler fired */
+	spin_lock_irqsave(&kretprobe_lock, flags);
 	arch_prepare_kretprobe(rp, regs);
+	spin_unlock_irqrestore(&kretprobe_lock, flags);
 	return 0;
 }
 
@@ -384,13 +373,13 @@ static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
         struct kprobe *kp;
 
 	if (p->break_handler) {
-		list_for_each_entry(kp, &old_p->list, list) {
+		list_for_each_entry_rcu(kp, &old_p->list, list) {
 			if (kp->break_handler)
 				return -EEXIST;
 		}
-		list_add_tail(&p->list, &old_p->list);
+		list_add_tail_rcu(&p->list, &old_p->list);
 	} else
-		list_add(&p->list, &old_p->list);
+		list_add_rcu(&p->list, &old_p->list);
 	return 0;
 }
 
@@ -408,18 +397,18 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
 	ap->break_handler = aggr_break_handler;
 
 	INIT_LIST_HEAD(&ap->list);
-	list_add(&p->list, &ap->list);
+	list_add_rcu(&p->list, &ap->list);
 
 	INIT_HLIST_NODE(&ap->hlist);
-	hlist_del(&p->hlist);
-	hlist_add_head(&ap->hlist,
+	hlist_del_rcu(&p->hlist);
+	hlist_add_head_rcu(&ap->hlist,
 		&kprobe_table[hash_ptr(ap->addr, KPROBE_HASH_BITS)]);
 }
 
 /*
  * This is the second or subsequent kprobe at the address - handle
  * the intricacies
- * TODO: Move kcalloc outside the spinlock
+ * TODO: Move kcalloc outside the spin_lock
  */
 static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
 					  struct kprobe *p)
@@ -445,7 +434,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
 static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags)
 {
 	arch_disarm_kprobe(p);
-	hlist_del(&p->hlist);
+	hlist_del_rcu(&p->hlist);
 	spin_unlock_irqrestore(&kprobe_lock, flags);
 	arch_remove_kprobe(p);
 }
@@ -453,11 +442,10 @@ static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags)
 static inline void cleanup_aggr_kprobe(struct kprobe *old_p,
 		struct kprobe *p, unsigned long flags)
 {
-	list_del(&p->list);
-	if (list_empty(&old_p->list)) {
+	list_del_rcu(&p->list);
+	if (list_empty(&old_p->list))
 		cleanup_kprobe(old_p, flags);
-		kfree(old_p);
-	} else
+	else
 		spin_unlock_irqrestore(&kprobe_lock, flags);
 }
 
@@ -480,9 +468,9 @@ int __kprobes register_kprobe(struct kprobe *p)
 	if ((ret = arch_prepare_kprobe(p)) != 0)
 		goto rm_kprobe;
 
+	p->nmissed = 0;
 	spin_lock_irqsave(&kprobe_lock, flags);
 	old_p = get_kprobe(p->addr);
-	p->nmissed = 0;
 	if (old_p) {
 		ret = register_aggr_kprobe(old_p, p);
 		goto out;
@@ -490,7 +478,7 @@ int __kprobes register_kprobe(struct kprobe *p)
 
 	arch_copy_kprobe(p);
 	INIT_HLIST_NODE(&p->hlist);
-	hlist_add_head(&p->hlist,
+	hlist_add_head_rcu(&p->hlist,
 		       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
 
   	arch_arm_kprobe(p);
@@ -511,10 +499,16 @@ void __kprobes unregister_kprobe(struct kprobe *p)
 	spin_lock_irqsave(&kprobe_lock, flags);
 	old_p = get_kprobe(p->addr);
 	if (old_p) {
+		/* cleanup_*_kprobe() does the spin_unlock_irqrestore */
 		if (old_p->pre_handler == aggr_pre_handler)
 			cleanup_aggr_kprobe(old_p, p, flags);
 		else
 			cleanup_kprobe(p, flags);
+
+		synchronize_sched();
+		if (old_p->pre_handler == aggr_pre_handler &&
+				list_empty(&old_p->list))
+			kfree(old_p);
 	} else
 		spin_unlock_irqrestore(&kprobe_lock, flags);
 }
@@ -591,13 +585,13 @@ void __kprobes unregister_kretprobe(struct kretprobe *rp)
 
 	unregister_kprobe(&rp->kp);
 	/* No race here */
-	spin_lock_irqsave(&kprobe_lock, flags);
+	spin_lock_irqsave(&kretprobe_lock, flags);
 	free_rp_inst(rp);
 	while ((ri = get_used_rp_inst(rp)) != NULL) {
 		ri->rp = NULL;
 		hlist_del(&ri->uflist);
 	}
-	spin_unlock_irqrestore(&kprobe_lock, flags);
+	spin_unlock_irqrestore(&kretprobe_lock, flags);
 }
 
 static int __init init_kprobes(void)
diff --git a/kernel/module.c b/kernel/module.c
index ff5c500ab625..2ea929d51ad0 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -37,6 +37,7 @@
 #include <linux/stop_machine.h>
 #include <linux/device.h>
 #include <linux/string.h>
+#include <linux/sched.h>
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
 #include <asm/cacheflush.h>
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 91a894264941..84af54c39e1b 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -497,7 +497,7 @@ static void process_timer_rebalance(struct task_struct *p,
 		left = cputime_div(cputime_sub(expires.cpu, val.cpu),
 				   nthreads);
 		do {
-			if (!unlikely(t->flags & PF_EXITING)) {
+			if (likely(!(t->flags & PF_EXITING))) {
 				ticks = cputime_add(prof_ticks(t), left);
 				if (cputime_eq(t->it_prof_expires,
 					       cputime_zero) ||
@@ -512,7 +512,7 @@ static void process_timer_rebalance(struct task_struct *p,
 		left = cputime_div(cputime_sub(expires.cpu, val.cpu),
 				   nthreads);
 		do {
-			if (!unlikely(t->flags & PF_EXITING)) {
+			if (likely(!(t->flags & PF_EXITING))) {
 				ticks = cputime_add(virt_ticks(t), left);
 				if (cputime_eq(t->it_virt_expires,
 					       cputime_zero) ||
@@ -527,7 +527,7 @@ static void process_timer_rebalance(struct task_struct *p,
 		nsleft = expires.sched - val.sched;
 		do_div(nsleft, nthreads);
 		do {
-			if (!unlikely(t->flags & PF_EXITING)) {
+			if (likely(!(t->flags & PF_EXITING))) {
 				ns = t->sched_time + nsleft;
 				if (t->it_sched_expires == 0 ||
 				    t->it_sched_expires > ns) {
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 42a628704398..723f5179883e 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -168,9 +168,8 @@ static unsigned count_data_pages(void)
 {
 	struct zone *zone;
 	unsigned long zone_pfn;
-	unsigned n;
+	unsigned int n = 0;
 
-	n = 0;
 	for_each_zone (zone) {
 		if (is_highmem(zone))
 			continue;
@@ -250,10 +249,10 @@ static inline void fill_pb_page(struct pbe *pbpage)
  *	of memory pages allocated with alloc_pagedir()
  */
 
-void create_pbe_list(struct pbe *pblist, unsigned nr_pages)
+void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
 {
 	struct pbe *pbpage, *p;
-	unsigned num = PBES_PER_PAGE;
+	unsigned int num = PBES_PER_PAGE;
 
 	for_each_pb_page (pbpage, pblist) {
 		if (num >= nr_pages)
@@ -293,9 +292,9 @@ static void *alloc_image_page(void)
  *	On each page we set up a list of struct_pbe elements.
  */
 
-struct pbe *alloc_pagedir(unsigned nr_pages)
+struct pbe *alloc_pagedir(unsigned int nr_pages)
 {
-	unsigned num;
+	unsigned int num;
 	struct pbe *pblist, *pbe;
 
 	if (!nr_pages)
@@ -329,7 +328,7 @@ void swsusp_free(void)
 	for_each_zone(zone) {
 		for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
 			if (pfn_valid(zone_pfn + zone->zone_start_pfn)) {
-				struct page * page;
+				struct page *page;
 				page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
 				if (PageNosave(page) && PageNosaveFree(page)) {
 					ClearPageNosave(page);
@@ -348,7 +347,7 @@ void swsusp_free(void)
  *	free pages.
  */
 
-static int enough_free_mem(unsigned nr_pages)
+static int enough_free_mem(unsigned int nr_pages)
 {
 	pr_debug("swsusp: available memory: %u pages\n", nr_free_pages());
 	return nr_free_pages() > (nr_pages + PAGES_FOR_IO +
@@ -356,7 +355,7 @@ static int enough_free_mem(unsigned nr_pages)
 }
 
 
-static struct pbe *swsusp_alloc(unsigned nr_pages)
+static struct pbe *swsusp_alloc(unsigned int nr_pages)
 {
 	struct pbe *pblist, *p;
 
@@ -380,7 +379,7 @@ static struct pbe *swsusp_alloc(unsigned nr_pages)
 
 asmlinkage int swsusp_save(void)
 {
-	unsigned nr_pages;
+	unsigned int nr_pages;
 
 	pr_debug("swsusp: critical section: \n");
 	if (save_highmem()) {
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 12db1d2ad61f..e1ab28b9b217 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -85,18 +85,11 @@ unsigned int nr_copy_pages __nosavedata = 0;
 /* Suspend pagedir is allocated before final copy, therefore it
    must be freed after resume
 
-   Warning: this is evil. There are actually two pagedirs at time of
-   resume. One is "pagedir_save", which is empty frame allocated at
-   time of suspend, that must be freed. Second is "pagedir_nosave",
-   allocated at time of resume, that travels through memory not to
-   collide with anything.
-
    Warning: this is even more evil than it seems. Pagedirs this file
    talks about are completely different from page directories used by
    MMU hardware.
  */
 suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
-suspend_pagedir_t *pagedir_save;
 
 #define SWSUSP_SIG	"S1SUSPEND"
 
@@ -122,8 +115,8 @@ static struct swsusp_info swsusp_info;
 static unsigned short swapfile_used[MAX_SWAPFILES];
 static unsigned short root_swap;
 
-static int write_page(unsigned long addr, swp_entry_t * loc);
-static int bio_read_page(pgoff_t page_off, void * page);
+static int write_page(unsigned long addr, swp_entry_t *loc);
+static int bio_read_page(pgoff_t page_off, void *page);
 
 static u8 key_iv[MAXKEY+MAXIV];
 
@@ -355,7 +348,7 @@ static void lock_swapdevices(void)
  *	This is a partial improvement, since we will at least return other
  *	errors, though we need to eventually fix the damn code.
  */
-static int write_page(unsigned long addr, swp_entry_t * loc)
+static int write_page(unsigned long addr, swp_entry_t *loc)
 {
 	swp_entry_t entry;
 	int error = 0;
@@ -383,9 +376,9 @@ static int write_page(unsigned long addr, swp_entry_t * loc)
 static void data_free(void)
 {
 	swp_entry_t entry;
-	struct pbe * p;
+	struct pbe *p;
 
-	for_each_pbe(p, pagedir_nosave) {
+	for_each_pbe (p, pagedir_nosave) {
 		entry = p->swap_address;
 		if (entry.val)
 			swap_free(entry);
@@ -492,8 +485,8 @@ static void free_pagedir_entries(void)
 static int write_pagedir(void)
 {
 	int error = 0;
-	unsigned n = 0;
-	struct pbe * pbe;
+	unsigned int n = 0;
+	struct pbe *pbe;
 
 	printk( "Writing pagedir...");
 	for_each_pb_page (pbe, pagedir_nosave) {
@@ -543,7 +536,7 @@ static int write_suspend_image(void)
  *	We should only consider resume_device.
  */
 
-int enough_swap(unsigned nr_pages)
+int enough_swap(unsigned int nr_pages)
 {
 	struct sysinfo i;
 
@@ -694,7 +687,7 @@ static int check_pagedir(struct pbe *pblist)
  *	restore from the loaded pages later.  We relocate them here.
  */
 
-static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist)
+static struct pbe *swsusp_pagedir_relocate(struct pbe *pblist)
 {
 	struct zone *zone;
 	unsigned long zone_pfn;
@@ -770,7 +763,7 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist)
 
 static atomic_t io_done = ATOMIC_INIT(0);
 
-static int end_io(struct bio * bio, unsigned int num, int err)
+static int end_io(struct bio *bio, unsigned int num, int err)
 {
 	if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
 		panic("I/O error reading memory image");
@@ -778,7 +771,7 @@ static int end_io(struct bio * bio, unsigned int num, int err)
 	return 0;
 }
 
-static struct block_device * resume_bdev;
+static struct block_device *resume_bdev;
 
 /**
  *	submit - submit BIO request.
@@ -791,10 +784,10 @@ static struct block_device * resume_bdev;
  *	Then submit it and wait.
  */
 
-static int submit(int rw, pgoff_t page_off, void * page)
+static int submit(int rw, pgoff_t page_off, void *page)
 {
 	int error = 0;
-	struct bio * bio;
+	struct bio *bio;
 
 	bio = bio_alloc(GFP_ATOMIC, 1);
 	if (!bio)
@@ -823,12 +816,12 @@ static int submit(int rw, pgoff_t page_off, void * page)
 	return error;
 }
 
-static int bio_read_page(pgoff_t page_off, void * page)
+static int bio_read_page(pgoff_t page_off, void *page)
 {
 	return submit(READ, page_off, page);
 }
 
-static int bio_write_page(pgoff_t page_off, void * page)
+static int bio_write_page(pgoff_t page_off, void *page)
 {
 	return submit(WRITE, page_off, page);
 }
@@ -838,7 +831,7 @@ static int bio_write_page(pgoff_t page_off, void * page)
  * I really don't think that it's foolproof but more than nothing..
  */
 
-static const char * sanity_check(void)
+static const char *sanity_check(void)
 {
 	dump_info();
 	if (swsusp_info.version_code != LINUX_VERSION_CODE)
@@ -864,7 +857,7 @@ static const char * sanity_check(void)
 
 static int check_header(void)
 {
-	const char * reason = NULL;
+	const char *reason = NULL;
 	int error;
 
 	if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info)))
@@ -912,7 +905,7 @@ static int check_sig(void)
 
 static int data_read(struct pbe *pblist)
 {
-	struct pbe * p;
+	struct pbe *p;
 	int error = 0;
 	int i = 0;
 	int mod = swsusp_info.image_pages / 100;
@@ -950,7 +943,7 @@ static int data_read(struct pbe *pblist)
 static int read_pagedir(struct pbe *pblist)
 {
 	struct pbe *pbpage, *p;
-	unsigned i = 0;
+	unsigned int i = 0;
 	int error;
 
 	if (!pblist)
diff --git a/kernel/printk.c b/kernel/printk.c
index 3cb9708209bc..e9be027bc930 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -806,7 +806,6 @@ void console_unblank(void)
 			c->unblank();
 	release_console_sem();
 }
-EXPORT_SYMBOL(console_unblank);
 
 /*
  * Return the console tty driver structure and its associated index
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 863eee8bff47..5b8dd98a230e 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -406,3 +406,85 @@ int ptrace_request(struct task_struct *child, long request,
 
 	return ret;
 }
+
+#ifndef __ARCH_SYS_PTRACE
+static int ptrace_get_task_struct(long request, long pid,
+		struct task_struct **childp)
+{
+	struct task_struct *child;
+	int ret;
+
+	/*
+	 * Callers use child == NULL as an indication to exit early even
+	 * when the return value is 0, so make sure it is non-NULL here.
+	 */
+	*childp = NULL;
+
+	if (request == PTRACE_TRACEME) {
+		/*
+		 * Are we already being traced?
+		 */
+		if (current->ptrace & PT_PTRACED)
+			return -EPERM;
+		ret = security_ptrace(current->parent, current);
+		if (ret)
+			return -EPERM;
+		/*
+		 * Set the ptrace bit in the process ptrace flags.
+		 */
+		current->ptrace |= PT_PTRACED;
+		return 0;
+	}
+
+	/*
+	 * You may not mess with init
+	 */
+	if (pid == 1)
+		return -EPERM;
+
+	ret = -ESRCH;
+	read_lock(&tasklist_lock);
+	child = find_task_by_pid(pid);
+	if (child)
+		get_task_struct(child);
+	read_unlock(&tasklist_lock);
+	if (!child)
+		return -ESRCH;
+
+	*childp = child;
+	return 0;
+}
+
+asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+{
+	struct task_struct *child;
+	long ret;
+
+	/*
+	 * This lock_kernel fixes a subtle race with suid exec
+	 */
+	lock_kernel();
+	ret = ptrace_get_task_struct(request, pid, &child);
+	if (!child)
+		goto out;
+
+	if (request == PTRACE_ATTACH) {
+		ret = ptrace_attach(child);
+		goto out;
+	}
+
+	ret = ptrace_check_attach(child, request == PTRACE_KILL);
+	if (ret < 0)
+		goto out_put_task_struct;
+
+	ret = arch_ptrace(child, request, addr, data);
+	if (ret < 0)
+		goto out_put_task_struct;
+
+ out_put_task_struct:
+	put_task_struct(child);
+ out:
+	unlock_kernel();
+	return ret;
+}
+#endif /* __ARCH_SYS_PTRACE */
diff --git a/kernel/sched.c b/kernel/sched.c
index b4f4eb613537..3ce26954be12 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3563,8 +3563,6 @@ int idle_cpu(int cpu)
 	return cpu_curr(cpu) == cpu_rq(cpu)->idle;
 }
 
-EXPORT_SYMBOL_GPL(idle_cpu);
-
 /**
  * idle_task - return the idle task for a given cpu.
  * @cpu: the processor in question.
@@ -4680,7 +4678,8 @@ static int migration_call(struct notifier_block *nfb, unsigned long action,
 #ifdef CONFIG_HOTPLUG_CPU
 	case CPU_UP_CANCELED:
 		/* Unbind it from offline cpu so it can run.  Fall thru. */
-		kthread_bind(cpu_rq(cpu)->migration_thread,smp_processor_id());
+		kthread_bind(cpu_rq(cpu)->migration_thread,
+			     any_online_cpu(cpu_online_map));
 		kthread_stop(cpu_rq(cpu)->migration_thread);
 		cpu_rq(cpu)->migration_thread = NULL;
 		break;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index f766b2fc48be..ad3295cdded5 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -470,7 +470,8 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
 #ifdef CONFIG_HOTPLUG_CPU
 	case CPU_UP_CANCELED:
 		/* Unbind so it can run.  Fall thru. */
-		kthread_bind(per_cpu(ksoftirqd, hotcpu), smp_processor_id());
+		kthread_bind(per_cpu(ksoftirqd, hotcpu),
+			     any_online_cpu(cpu_online_map));
 	case CPU_DEAD:
 		p = per_cpu(ksoftirqd, hotcpu);
 		per_cpu(ksoftirqd, hotcpu) = NULL;
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 75976209cea7..a2dcceb9437d 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -123,7 +123,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 #ifdef CONFIG_HOTPLUG_CPU
 	case CPU_UP_CANCELED:
 		/* Unbind so it can run.  Fall thru. */
-		kthread_bind(per_cpu(watchdog_task, hotcpu), smp_processor_id());
+		kthread_bind(per_cpu(watchdog_task, hotcpu),
+			     any_online_cpu(cpu_online_map));
 	case CPU_DEAD:
 		p = per_cpu(watchdog_task, hotcpu);
 		per_cpu(watchdog_task, hotcpu) = NULL;
diff --git a/kernel/sys.c b/kernel/sys.c
index 2fa1ed18123c..c43b3e22bbda 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -28,6 +28,7 @@
 #include <linux/suspend.h>
 #include <linux/tty.h>
 #include <linux/signal.h>
+#include <linux/cn_proc.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -375,18 +376,21 @@ void emergency_restart(void)
 }
 EXPORT_SYMBOL_GPL(emergency_restart);
 
-/**
- *	kernel_restart - reboot the system
- *
- *	Shutdown everything and perform a clean reboot.
- *	This is not safe to call in interrupt context.
- */
 void kernel_restart_prepare(char *cmd)
 {
 	notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
 	system_state = SYSTEM_RESTART;
 	device_shutdown();
 }
+
+/**
+ *	kernel_restart - reboot the system
+ *	@cmd: pointer to buffer containing command to execute for restart
+ *		or %NULL
+ *
+ *	Shutdown everything and perform a clean reboot.
+ *	This is not safe to call in interrupt context.
+ */
 void kernel_restart(char *cmd)
 {
 	kernel_restart_prepare(cmd);
@@ -623,6 +627,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 	current->egid = new_egid;
 	current->gid = new_rgid;
 	key_fsgid_changed(current);
+	proc_id_connector(current, PROC_EVENT_GID);
 	return 0;
 }
 
@@ -662,6 +667,7 @@ asmlinkage long sys_setgid(gid_t gid)
 		return -EPERM;
 
 	key_fsgid_changed(current);
+	proc_id_connector(current, PROC_EVENT_GID);
 	return 0;
 }
   
@@ -751,6 +757,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 	current->fsuid = current->euid;
 
 	key_fsuid_changed(current);
+	proc_id_connector(current, PROC_EVENT_UID);
 
 	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE);
 }
@@ -798,6 +805,7 @@ asmlinkage long sys_setuid(uid_t uid)
 	current->suid = new_suid;
 
 	key_fsuid_changed(current);
+	proc_id_connector(current, PROC_EVENT_UID);
 
 	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID);
 }
@@ -846,6 +854,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 		current->suid = suid;
 
 	key_fsuid_changed(current);
+	proc_id_connector(current, PROC_EVENT_UID);
 
 	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES);
 }
@@ -898,6 +907,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 		current->sgid = sgid;
 
 	key_fsgid_changed(current);
+	proc_id_connector(current, PROC_EVENT_GID);
 	return 0;
 }
 
@@ -940,6 +950,7 @@ asmlinkage long sys_setfsuid(uid_t uid)
 	}
 
 	key_fsuid_changed(current);
+	proc_id_connector(current, PROC_EVENT_UID);
 
 	security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS);
 
@@ -968,6 +979,7 @@ asmlinkage long sys_setfsgid(gid_t gid)
 		}
 		current->fsgid = gid;
 		key_fsgid_changed(current);
+		proc_id_connector(current, PROC_EVENT_GID);
 	}
 	return old_fsgid;
 }
@@ -1485,8 +1497,6 @@ EXPORT_SYMBOL(in_egroup_p);
 
 DECLARE_RWSEM(uts_sem);
 
-EXPORT_SYMBOL(uts_sem);
-
 asmlinkage long sys_newuname(struct new_utsname __user * name)
 {
 	int errno = 0;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8e56e2495542..9990e10192e8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -169,7 +169,7 @@ struct file_operations proc_sys_file_operations = {
 
 extern struct proc_dir_entry *proc_sys_root;
 
-static void register_proc_table(ctl_table *, struct proc_dir_entry *);
+static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *);
 static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
 #endif
 
@@ -952,7 +952,7 @@ static ctl_table fs_table[] = {
 		.data		= &aio_nr,
 		.maxlen		= sizeof(aio_nr),
 		.mode		= 0444,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_doulongvec_minmax,
 	},
 	{
 		.ctl_name	= FS_AIO_MAX_NR,
@@ -960,7 +960,7 @@ static ctl_table fs_table[] = {
 		.data		= &aio_max_nr,
 		.maxlen		= sizeof(aio_max_nr),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_doulongvec_minmax,
 	},
 #ifdef CONFIG_INOTIFY
 	{
@@ -992,10 +992,51 @@ static ctl_table dev_table[] = {
 
 extern void init_irq_proc (void);
 
+static DEFINE_SPINLOCK(sysctl_lock);
+
+/* called under sysctl_lock */
+static int use_table(struct ctl_table_header *p)
+{
+	if (unlikely(p->unregistering))
+		return 0;
+	p->used++;
+	return 1;
+}
+
+/* called under sysctl_lock */
+static void unuse_table(struct ctl_table_header *p)
+{
+	if (!--p->used)
+		if (unlikely(p->unregistering))
+			complete(p->unregistering);
+}
+
+/* called under sysctl_lock, will reacquire if has to wait */
+static void start_unregistering(struct ctl_table_header *p)
+{
+	/*
+	 * if p->used is 0, nobody will ever touch that entry again;
+	 * we'll eliminate all paths to it before dropping sysctl_lock
+	 */
+	if (unlikely(p->used)) {
+		struct completion wait;
+		init_completion(&wait);
+		p->unregistering = &wait;
+		spin_unlock(&sysctl_lock);
+		wait_for_completion(&wait);
+		spin_lock(&sysctl_lock);
+	}
+	/*
+	 * do not remove from the list until nobody holds it; walking the
+	 * list in do_sysctl() relies on that.
+	 */
+	list_del_init(&p->ctl_entry);
+}
+
 void __init sysctl_init(void)
 {
 #ifdef CONFIG_PROC_FS
-	register_proc_table(root_table, proc_sys_root);
+	register_proc_table(root_table, proc_sys_root, &root_table_header);
 	init_irq_proc();
 #endif
 }
@@ -1004,6 +1045,7 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol
 	       void __user *newval, size_t newlen)
 {
 	struct list_head *tmp;
+	int error = -ENOTDIR;
 
 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
 		return -ENOTDIR;
@@ -1012,20 +1054,30 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol
 		if (!oldlenp || get_user(old_len, oldlenp))
 			return -EFAULT;
 	}
+	spin_lock(&sysctl_lock);
 	tmp = &root_table_header.ctl_entry;
 	do {
 		struct ctl_table_header *head =
 			list_entry(tmp, struct ctl_table_header, ctl_entry);
 		void *context = NULL;
-		int error = parse_table(name, nlen, oldval, oldlenp, 
+
+		if (!use_table(head))
+			continue;
+
+		spin_unlock(&sysctl_lock);
+
+		error = parse_table(name, nlen, oldval, oldlenp, 
 					newval, newlen, head->ctl_table,
 					&context);
 		kfree(context);
+
+		spin_lock(&sysctl_lock);
+		unuse_table(head);
 		if (error != -ENOTDIR)
-			return error;
-		tmp = tmp->next;
-	} while (tmp != &root_table_header.ctl_entry);
-	return -ENOTDIR;
+			break;
+	} while ((tmp = tmp->next) != &root_table_header.ctl_entry);
+	spin_unlock(&sysctl_lock);
+	return error;
 }
 
 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
@@ -1236,12 +1288,16 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table,
 		return NULL;
 	tmp->ctl_table = table;
 	INIT_LIST_HEAD(&tmp->ctl_entry);
+	tmp->used = 0;
+	tmp->unregistering = NULL;
+	spin_lock(&sysctl_lock);
 	if (insert_at_head)
 		list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
 	else
 		list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
+	spin_unlock(&sysctl_lock);
 #ifdef CONFIG_PROC_FS
-	register_proc_table(table, proc_sys_root);
+	register_proc_table(table, proc_sys_root, tmp);
 #endif
 	return tmp;
 }
@@ -1255,10 +1311,13 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table,
  */
 void unregister_sysctl_table(struct ctl_table_header * header)
 {
-	list_del(&header->ctl_entry);
+	might_sleep();
+	spin_lock(&sysctl_lock);
+	start_unregistering(header);
 #ifdef CONFIG_PROC_FS
 	unregister_proc_table(header->ctl_table, proc_sys_root);
 #endif
+	spin_unlock(&sysctl_lock);
 	kfree(header);
 }
 
@@ -1269,7 +1328,7 @@ void unregister_sysctl_table(struct ctl_table_header * header)
 #ifdef CONFIG_PROC_FS
 
 /* Scan the sysctl entries in table and add them all into /proc */
-static void register_proc_table(ctl_table * table, struct proc_dir_entry *root)
+static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set)
 {
 	struct proc_dir_entry *de;
 	int len;
@@ -1305,13 +1364,14 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root)
 			de = create_proc_entry(table->procname, mode, root);
 			if (!de)
 				continue;
+			de->set = set;
 			de->data = (void *) table;
 			if (table->proc_handler)
 				de->proc_fops = &proc_sys_file_operations;
 		}
 		table->de = de;
 		if (de->mode & S_IFDIR)
-			register_proc_table(table->child, de);
+			register_proc_table(table->child, de, set);
 	}
 }
 
@@ -1336,6 +1396,13 @@ static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root
 				continue;
 		}
 
+		/*
+		 * In any case, mark the entry as goner; we'll keep it
+		 * around if it's busy, but we'll know to do nothing with
+		 * its fields.  We are under sysctl_lock here.
+		 */
+		de->data = NULL;
+
 		/* Don't unregister proc entries that are still being used.. */
 		if (atomic_read(&de->count))
 			continue;
@@ -1349,27 +1416,38 @@ static ssize_t do_rw_proc(int write, struct file * file, char __user * buf,
 			  size_t count, loff_t *ppos)
 {
 	int op;
-	struct proc_dir_entry *de;
+	struct proc_dir_entry *de = PDE(file->f_dentry->d_inode);
 	struct ctl_table *table;
 	size_t res;
-	ssize_t error;
-	
-	de = PDE(file->f_dentry->d_inode);
-	if (!de || !de->data)
-		return -ENOTDIR;
-	table = (struct ctl_table *) de->data;
-	if (!table || !table->proc_handler)
-		return -ENOTDIR;
-	op = (write ? 002 : 004);
-	if (ctl_perm(table, op))
-		return -EPERM;
+	ssize_t error = -ENOTDIR;
 	
-	res = count;
-
-	error = (*table->proc_handler) (table, write, file, buf, &res, ppos);
-	if (error)
-		return error;
-	return res;
+	spin_lock(&sysctl_lock);
+	if (de && de->data && use_table(de->set)) {
+		/*
+		 * at that point we know that sysctl was not unregistered
+		 * and won't be until we finish
+		 */
+		spin_unlock(&sysctl_lock);
+		table = (struct ctl_table *) de->data;
+		if (!table || !table->proc_handler)
+			goto out;
+		error = -EPERM;
+		op = (write ? 002 : 004);
+		if (ctl_perm(table, op))
+			goto out;
+		
+		/* careful: calling conventions are nasty here */
+		res = count;
+		error = (*table->proc_handler)(table, write, file,
+						buf, &res, ppos);
+		if (!error)
+			error = res;
+	out:
+		spin_lock(&sysctl_lock);
+		unuse_table(de->set);
+	}
+	spin_unlock(&sysctl_lock);
+	return error;
 }
 
 static int proc_opensys(struct inode *inode, struct file *file)
@@ -1997,6 +2075,7 @@ int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
  * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
+ * @ppos: pointer to the file position
  *
  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
  * values from/to the user buffer, treated as an ASCII string. 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 7cee222231bc..42df83d7fad2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -524,7 +524,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 		list_for_each_entry(wq, &workqueues, list) {
 			/* Unbind so it can run. */
 			kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread,
-				     smp_processor_id());
+				     any_online_cpu(cpu_online_map));
 			cleanup_workqueue_thread(wq, hotcpu);
 		}
 		break;