diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/acct.c | 92 | ||||
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 9 | ||||
-rw-r--r-- | kernel/futex.c | 5 | ||||
-rw-r--r-- | kernel/irq/manage.c | 1 | ||||
-rw-r--r-- | kernel/kprobes.c | 134 | ||||
-rw-r--r-- | kernel/module.c | 1 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 6 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 19 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 45 | ||||
-rw-r--r-- | kernel/printk.c | 1 | ||||
-rw-r--r-- | kernel/ptrace.c | 82 | ||||
-rw-r--r-- | kernel/sched.c | 5 | ||||
-rw-r--r-- | kernel/softirq.c | 3 | ||||
-rw-r--r-- | kernel/softlockup.c | 3 | ||||
-rw-r--r-- | kernel/sys.c | 26 | ||||
-rw-r--r-- | kernel/sysctl.c | 141 | ||||
-rw-r--r-- | kernel/workqueue.c | 2 |
18 files changed, 385 insertions, 192 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 2e3f4a47e7d0..6312d6bd43e3 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -54,6 +54,7 @@ #include <linux/jiffies.h> #include <linux/times.h> #include <linux/syscalls.h> +#include <linux/mount.h> #include <asm/uaccess.h> #include <asm/div64.h> #include <linux/blkdev.h> /* sector_div */ @@ -192,6 +193,7 @@ static void acct_file_reopen(struct file *file) add_timer(&acct_globals.timer); } if (old_acct) { + mnt_unpin(old_acct->f_vfsmnt); spin_unlock(&acct_globals.lock); do_acct_process(0, old_acct); filp_close(old_acct, NULL); @@ -199,6 +201,42 @@ static void acct_file_reopen(struct file *file) } } +static int acct_on(char *name) +{ + struct file *file; + int error; + + /* Difference from BSD - they don't do O_APPEND */ + file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { + filp_close(file, NULL); + return -EACCES; + } + + if (!file->f_op->write) { + filp_close(file, NULL); + return -EIO; + } + + error = security_acct(file); + if (error) { + filp_close(file, NULL); + return error; + } + + spin_lock(&acct_globals.lock); + mnt_pin(file->f_vfsmnt); + acct_file_reopen(file); + spin_unlock(&acct_globals.lock); + + mntput(file->f_vfsmnt); /* it's pinned, now give up active reference */ + + return 0; +} + /** * sys_acct - enable/disable process accounting * @name: file name for accounting records or NULL to shutdown accounting @@ -212,47 +250,41 @@ static void acct_file_reopen(struct file *file) */ asmlinkage long sys_acct(const char __user *name) { - struct file *file = NULL; - char *tmp; int error; if (!capable(CAP_SYS_PACCT)) return -EPERM; if (name) { - tmp = getname(name); - if (IS_ERR(tmp)) { + char *tmp = getname(name); + if (IS_ERR(tmp)) return (PTR_ERR(tmp)); - } - /* Difference from BSD - they don't do O_APPEND */ - file = filp_open(tmp, O_WRONLY|O_APPEND|O_LARGEFILE, 0); + error = acct_on(tmp); putname(tmp); - if (IS_ERR(file)) { - return (PTR_ERR(file)); - } - if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { - filp_close(file, NULL); - return (-EACCES); - } - - if (!file->f_op->write) { - filp_close(file, NULL); - return (-EIO); + } else { + error = security_acct(NULL); + if (!error) { + spin_lock(&acct_globals.lock); + acct_file_reopen(NULL); + spin_unlock(&acct_globals.lock); } } + return error; +} - error = security_acct(file); - if (error) { - if (file) - filp_close(file, NULL); - return error; - } - +/** + * acct_auto_close - turn off a filesystem's accounting if it is on + * @m: vfsmount being shut down + * + * If the accounting is turned on for a file in the subtree pointed to + * to by m, turn accounting off. Done when m is about to die. + */ +void acct_auto_close_mnt(struct vfsmount *m) +{ spin_lock(&acct_globals.lock); - acct_file_reopen(file); + if (acct_globals.file && acct_globals.file->f_vfsmnt == m) + acct_file_reopen(NULL); spin_unlock(&acct_globals.lock); - - return (0); } /** @@ -266,8 +298,8 @@ void acct_auto_close(struct super_block *sb) { spin_lock(&acct_globals.lock); if (acct_globals.file && - acct_globals.file->f_dentry->d_inode->i_sb == sb) { - acct_file_reopen((struct file *)NULL); + acct_globals.file->f_vfsmnt->mnt_sb == sb) { + acct_file_reopen(NULL); } spin_unlock(&acct_globals.lock); } diff --git a/kernel/exit.c b/kernel/exit.c index 537394b25e8d..452a1d116178 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -28,6 +28,7 @@ #include <linux/cpuset.h> #include <linux/syscalls.h> #include <linux/signal.h> +#include <linux/cn_proc.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -863,6 +864,7 @@ fastcall NORET_TYPE void do_exit(long code) module_put(tsk->binfmt->module); tsk->exit_code = code; + proc_exit_connector(tsk); exit_notify(tsk); #ifdef CONFIG_NUMA mpol_free(tsk->mempolicy); diff --git a/kernel/fork.c b/kernel/fork.c index 8a069612eac3..158710d22566 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -42,6 +42,7 @@ #include <linux/profile.h> #include <linux/rmap.h> #include <linux/acct.h> +#include <linux/cn_proc.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -469,13 +470,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) if (clone_flags & CLONE_VM) { atomic_inc(&oldmm->mm_users); mm = oldmm; - /* - * There are cases where the PTL is held to ensure no - * new threads start up in user mode using an mm, which - * allows optimizing out ipis; the tlb_gather_mmu code - * is an example. - */ - spin_unlock_wait(&oldmm->page_table_lock); goto good_mm; } @@ -1143,6 +1137,7 @@ static task_t *copy_process(unsigned long clone_flags, __get_cpu_var(process_counts)++; } + proc_fork_connector(p); if (!current->signal->tty && p->signal->tty) p->signal->tty = NULL; diff --git a/kernel/futex.c b/kernel/futex.c index 3b4d5ad44cc6..aca8d10704f6 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -365,6 +365,11 @@ retry: if (bh1 != bh2) spin_unlock(&bh2->lock); + if (unlikely(op_ret != -EFAULT)) { + ret = op_ret; + goto out; + } + /* futex_atomic_op_inuser needs to both read and write * *(int __user *)uaddr2, but we can't modify it * non-atomically. Therefore, if get_user below is not diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 1cfdb08ddf20..3bd7226d15fa 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -24,6 +24,7 @@ cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; /** * synchronize_irq - wait for pending IRQ handlers (on other CPUs) + * @irq: interrupt number to wait for * * This function waits for any pending IRQ handlers for this interrupt * to complete before returning. If you use this function while diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ce4915dd683a..5beda378cc75 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -32,7 +32,6 @@ * <prasanna@in.ibm.com> added function-return probes. */ #include <linux/kprobes.h> -#include <linux/spinlock.h> #include <linux/hash.h> #include <linux/init.h> #include <linux/slab.h> @@ -49,9 +48,9 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; -unsigned int kprobe_cpu = NR_CPUS; -static DEFINE_SPINLOCK(kprobe_lock); -static struct kprobe *curr_kprobe; +static DEFINE_SPINLOCK(kprobe_lock); /* Protects kprobe_table */ +DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ +static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; /* * kprobe->ainsn.insn points to the copy of the instruction to be @@ -153,50 +152,31 @@ void __kprobes free_insn_slot(kprobe_opcode_t *slot) } } -/* Locks kprobe: irqs must be disabled */ -void __kprobes lock_kprobes(void) +/* We have preemption disabled.. so it is safe to use __ versions */ +static inline void set_kprobe_instance(struct kprobe *kp) { - unsigned long flags = 0; - - /* Avoiding local interrupts to happen right after we take the kprobe_lock - * and before we get a chance to update kprobe_cpu, this to prevent - * deadlock when we have a kprobe on ISR routine and a kprobe on task - * routine - */ - local_irq_save(flags); - - spin_lock(&kprobe_lock); - kprobe_cpu = smp_processor_id(); - - local_irq_restore(flags); + __get_cpu_var(kprobe_instance) = kp; } -void __kprobes unlock_kprobes(void) +static inline void reset_kprobe_instance(void) { - unsigned long flags = 0; - - /* Avoiding local interrupts to happen right after we update - * kprobe_cpu and before we get a a chance to release kprobe_lock, - * this to prevent deadlock when we have a kprobe on ISR routine and - * a kprobe on task routine - */ - local_irq_save(flags); - - kprobe_cpu = NR_CPUS; - spin_unlock(&kprobe_lock); - - local_irq_restore(flags); + __get_cpu_var(kprobe_instance) = NULL; } -/* You have to be holding the kprobe_lock */ +/* + * This routine is called either: + * - under the kprobe_lock spinlock - during kprobe_[un]register() + * OR + * - with preemption disabled - from arch/xxx/kernel/kprobes.c + */ struct kprobe __kprobes *get_kprobe(void *addr) { struct hlist_head *head; struct hlist_node *node; + struct kprobe *p; head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; - hlist_for_each(node, head) { - struct kprobe *p = hlist_entry(node, struct kprobe, hlist); + hlist_for_each_entry_rcu(p, node, head, hlist) { if (p->addr == addr) return p; } @@ -211,13 +191,13 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe *kp; - list_for_each_entry(kp, &p->list, list) { + list_for_each_entry_rcu(kp, &p->list, list) { if (kp->pre_handler) { - curr_kprobe = kp; + set_kprobe_instance(kp); if (kp->pre_handler(kp, regs)) return 1; } - curr_kprobe = NULL; + reset_kprobe_instance(); } return 0; } @@ -227,11 +207,11 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, { struct kprobe *kp; - list_for_each_entry(kp, &p->list, list) { + list_for_each_entry_rcu(kp, &p->list, list) { if (kp->post_handler) { - curr_kprobe = kp; + set_kprobe_instance(kp); kp->post_handler(kp, regs, flags); - curr_kprobe = NULL; + reset_kprobe_instance(); } } return; @@ -240,12 +220,14 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr) { + struct kprobe *cur = __get_cpu_var(kprobe_instance); + /* * if we faulted "during" the execution of a user specified * probe handler, invoke just that probe's fault handler */ - if (curr_kprobe && curr_kprobe->fault_handler) { - if (curr_kprobe->fault_handler(curr_kprobe, regs, trapnr)) + if (cur && cur->fault_handler) { + if (cur->fault_handler(cur, regs, trapnr)) return 1; } return 0; @@ -253,17 +235,18 @@ static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) { - struct kprobe *kp = curr_kprobe; - if (curr_kprobe && kp->break_handler) { - if (kp->break_handler(kp, regs)) { - curr_kprobe = NULL; - return 1; - } + struct kprobe *cur = __get_cpu_var(kprobe_instance); + int ret = 0; + + if (cur && cur->break_handler) { + if (cur->break_handler(cur, regs)) + ret = 1; } - curr_kprobe = NULL; - return 0; + reset_kprobe_instance(); + return ret; } +/* Called with kretprobe_lock held */ struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) { struct hlist_node *node; @@ -273,6 +256,7 @@ struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) return NULL; } +/* Called with kretprobe_lock held */ static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe *rp) { @@ -283,6 +267,7 @@ static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe return NULL; } +/* Called with kretprobe_lock held */ void __kprobes add_rp_inst(struct kretprobe_instance *ri) { /* @@ -301,6 +286,7 @@ void __kprobes add_rp_inst(struct kretprobe_instance *ri) hlist_add_head(&ri->uflist, &ri->rp->used_instances); } +/* Called with kretprobe_lock held */ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri) { /* remove rp inst off the rprobe_inst_table */ @@ -334,13 +320,13 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) struct hlist_node *node, *tmp; unsigned long flags = 0; - spin_lock_irqsave(&kprobe_lock, flags); + spin_lock_irqsave(&kretprobe_lock, flags); head = kretprobe_inst_table_head(current); hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { if (ri->task == tk) recycle_rp_inst(ri); } - spin_unlock_irqrestore(&kprobe_lock, flags); + spin_unlock_irqrestore(&kretprobe_lock, flags); } /* @@ -351,9 +337,12 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) { struct kretprobe *rp = container_of(p, struct kretprobe, kp); + unsigned long flags = 0; /*TODO: consider to only swap the RA after the last pre_handler fired */ + spin_lock_irqsave(&kretprobe_lock, flags); arch_prepare_kretprobe(rp, regs); + spin_unlock_irqrestore(&kretprobe_lock, flags); return 0; } @@ -384,13 +373,13 @@ static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) struct kprobe *kp; if (p->break_handler) { - list_for_each_entry(kp, &old_p->list, list) { + list_for_each_entry_rcu(kp, &old_p->list, list) { if (kp->break_handler) return -EEXIST; } - list_add_tail(&p->list, &old_p->list); + list_add_tail_rcu(&p->list, &old_p->list); } else - list_add(&p->list, &old_p->list); + list_add_rcu(&p->list, &old_p->list); return 0; } @@ -408,18 +397,18 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) ap->break_handler = aggr_break_handler; INIT_LIST_HEAD(&ap->list); - list_add(&p->list, &ap->list); + list_add_rcu(&p->list, &ap->list); INIT_HLIST_NODE(&ap->hlist); - hlist_del(&p->hlist); - hlist_add_head(&ap->hlist, + hlist_del_rcu(&p->hlist); + hlist_add_head_rcu(&ap->hlist, &kprobe_table[hash_ptr(ap->addr, KPROBE_HASH_BITS)]); } /* * This is the second or subsequent kprobe at the address - handle * the intricacies - * TODO: Move kcalloc outside the spinlock + * TODO: Move kcalloc outside the spin_lock */ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) @@ -445,7 +434,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) { arch_disarm_kprobe(p); - hlist_del(&p->hlist); + hlist_del_rcu(&p->hlist); spin_unlock_irqrestore(&kprobe_lock, flags); arch_remove_kprobe(p); } @@ -453,11 +442,10 @@ static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) static inline void cleanup_aggr_kprobe(struct kprobe *old_p, struct kprobe *p, unsigned long flags) { - list_del(&p->list); - if (list_empty(&old_p->list)) { + list_del_rcu(&p->list); + if (list_empty(&old_p->list)) cleanup_kprobe(old_p, flags); - kfree(old_p); - } else + else spin_unlock_irqrestore(&kprobe_lock, flags); } @@ -480,9 +468,9 @@ int __kprobes register_kprobe(struct kprobe *p) if ((ret = arch_prepare_kprobe(p)) != 0) goto rm_kprobe; + p->nmissed = 0; spin_lock_irqsave(&kprobe_lock, flags); old_p = get_kprobe(p->addr); - p->nmissed = 0; if (old_p) { ret = register_aggr_kprobe(old_p, p); goto out; @@ -490,7 +478,7 @@ int __kprobes register_kprobe(struct kprobe *p) arch_copy_kprobe(p); INIT_HLIST_NODE(&p->hlist); - hlist_add_head(&p->hlist, + hlist_add_head_rcu(&p->hlist, &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); arch_arm_kprobe(p); @@ -511,10 +499,16 @@ void __kprobes unregister_kprobe(struct kprobe *p) spin_lock_irqsave(&kprobe_lock, flags); old_p = get_kprobe(p->addr); if (old_p) { + /* cleanup_*_kprobe() does the spin_unlock_irqrestore */ if (old_p->pre_handler == aggr_pre_handler) cleanup_aggr_kprobe(old_p, p, flags); else cleanup_kprobe(p, flags); + + synchronize_sched(); + if (old_p->pre_handler == aggr_pre_handler && + list_empty(&old_p->list)) + kfree(old_p); } else spin_unlock_irqrestore(&kprobe_lock, flags); } @@ -591,13 +585,13 @@ void __kprobes unregister_kretprobe(struct kretprobe *rp) unregister_kprobe(&rp->kp); /* No race here */ - spin_lock_irqsave(&kprobe_lock, flags); + spin_lock_irqsave(&kretprobe_lock, flags); free_rp_inst(rp); while ((ri = get_used_rp_inst(rp)) != NULL) { ri->rp = NULL; hlist_del(&ri->uflist); } - spin_unlock_irqrestore(&kprobe_lock, flags); + spin_unlock_irqrestore(&kretprobe_lock, flags); } static int __init init_kprobes(void) diff --git a/kernel/module.c b/kernel/module.c index ff5c500ab625..2ea929d51ad0 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -37,6 +37,7 @@ #include <linux/stop_machine.h> #include <linux/device.h> #include <linux/string.h> +#include <linux/sched.h> #include <asm/uaccess.h> #include <asm/semaphore.h> #include <asm/cacheflush.h> diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 91a894264941..84af54c39e1b 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -497,7 +497,7 @@ static void process_timer_rebalance(struct task_struct *p, left = cputime_div(cputime_sub(expires.cpu, val.cpu), nthreads); do { - if (!unlikely(t->flags & PF_EXITING)) { + if (likely(!(t->flags & PF_EXITING))) { ticks = cputime_add(prof_ticks(t), left); if (cputime_eq(t->it_prof_expires, cputime_zero) || @@ -512,7 +512,7 @@ static void process_timer_rebalance(struct task_struct *p, left = cputime_div(cputime_sub(expires.cpu, val.cpu), nthreads); do { - if (!unlikely(t->flags & PF_EXITING)) { + if (likely(!(t->flags & PF_EXITING))) { ticks = cputime_add(virt_ticks(t), left); if (cputime_eq(t->it_virt_expires, cputime_zero) || @@ -527,7 +527,7 @@ static void process_timer_rebalance(struct task_struct *p, nsleft = expires.sched - val.sched; do_div(nsleft, nthreads); do { - if (!unlikely(t->flags & PF_EXITING)) { + if (likely(!(t->flags & PF_EXITING))) { ns = t->sched_time + nsleft; if (t->it_sched_expires == 0 || t->it_sched_expires > ns) { diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 42a628704398..723f5179883e 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -168,9 +168,8 @@ static unsigned count_data_pages(void) { struct zone *zone; unsigned long zone_pfn; - unsigned n; + unsigned int n = 0; - n = 0; for_each_zone (zone) { if (is_highmem(zone)) continue; @@ -250,10 +249,10 @@ static inline void fill_pb_page(struct pbe *pbpage) * of memory pages allocated with alloc_pagedir() */ -void create_pbe_list(struct pbe *pblist, unsigned nr_pages) +void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) { struct pbe *pbpage, *p; - unsigned num = PBES_PER_PAGE; + unsigned int num = PBES_PER_PAGE; for_each_pb_page (pbpage, pblist) { if (num >= nr_pages) @@ -293,9 +292,9 @@ static void *alloc_image_page(void) * On each page we set up a list of struct_pbe elements. */ -struct pbe *alloc_pagedir(unsigned nr_pages) +struct pbe *alloc_pagedir(unsigned int nr_pages) { - unsigned num; + unsigned int num; struct pbe *pblist, *pbe; if (!nr_pages) @@ -329,7 +328,7 @@ void swsusp_free(void) for_each_zone(zone) { for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) if (pfn_valid(zone_pfn + zone->zone_start_pfn)) { - struct page * page; + struct page *page; page = pfn_to_page(zone_pfn + zone->zone_start_pfn); if (PageNosave(page) && PageNosaveFree(page)) { ClearPageNosave(page); @@ -348,7 +347,7 @@ void swsusp_free(void) * free pages. */ -static int enough_free_mem(unsigned nr_pages) +static int enough_free_mem(unsigned int nr_pages) { pr_debug("swsusp: available memory: %u pages\n", nr_free_pages()); return nr_free_pages() > (nr_pages + PAGES_FOR_IO + @@ -356,7 +355,7 @@ static int enough_free_mem(unsigned nr_pages) } -static struct pbe *swsusp_alloc(unsigned nr_pages) +static struct pbe *swsusp_alloc(unsigned int nr_pages) { struct pbe *pblist, *p; @@ -380,7 +379,7 @@ static struct pbe *swsusp_alloc(unsigned nr_pages) asmlinkage int swsusp_save(void) { - unsigned nr_pages; + unsigned int nr_pages; pr_debug("swsusp: critical section: \n"); if (save_highmem()) { diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 12db1d2ad61f..e1ab28b9b217 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -85,18 +85,11 @@ unsigned int nr_copy_pages __nosavedata = 0; /* Suspend pagedir is allocated before final copy, therefore it must be freed after resume - Warning: this is evil. There are actually two pagedirs at time of - resume. One is "pagedir_save", which is empty frame allocated at - time of suspend, that must be freed. Second is "pagedir_nosave", - allocated at time of resume, that travels through memory not to - collide with anything. - Warning: this is even more evil than it seems. Pagedirs this file talks about are completely different from page directories used by MMU hardware. */ suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; -suspend_pagedir_t *pagedir_save; #define SWSUSP_SIG "S1SUSPEND" @@ -122,8 +115,8 @@ static struct swsusp_info swsusp_info; static unsigned short swapfile_used[MAX_SWAPFILES]; static unsigned short root_swap; -static int write_page(unsigned long addr, swp_entry_t * loc); -static int bio_read_page(pgoff_t page_off, void * page); +static int write_page(unsigned long addr, swp_entry_t *loc); +static int bio_read_page(pgoff_t page_off, void *page); static u8 key_iv[MAXKEY+MAXIV]; @@ -355,7 +348,7 @@ static void lock_swapdevices(void) * This is a partial improvement, since we will at least return other * errors, though we need to eventually fix the damn code. */ -static int write_page(unsigned long addr, swp_entry_t * loc) +static int write_page(unsigned long addr, swp_entry_t *loc) { swp_entry_t entry; int error = 0; @@ -383,9 +376,9 @@ static int write_page(unsigned long addr, swp_entry_t * loc) static void data_free(void) { swp_entry_t entry; - struct pbe * p; + struct pbe *p; - for_each_pbe(p, pagedir_nosave) { + for_each_pbe (p, pagedir_nosave) { entry = p->swap_address; if (entry.val) swap_free(entry); @@ -492,8 +485,8 @@ static void free_pagedir_entries(void) static int write_pagedir(void) { int error = 0; - unsigned n = 0; - struct pbe * pbe; + unsigned int n = 0; + struct pbe *pbe; printk( "Writing pagedir..."); for_each_pb_page (pbe, pagedir_nosave) { @@ -543,7 +536,7 @@ static int write_suspend_image(void) * We should only consider resume_device. */ -int enough_swap(unsigned nr_pages) +int enough_swap(unsigned int nr_pages) { struct sysinfo i; @@ -694,7 +687,7 @@ static int check_pagedir(struct pbe *pblist) * restore from the loaded pages later. We relocate them here. */ -static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) +static struct pbe *swsusp_pagedir_relocate(struct pbe *pblist) { struct zone *zone; unsigned long zone_pfn; @@ -770,7 +763,7 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) static atomic_t io_done = ATOMIC_INIT(0); -static int end_io(struct bio * bio, unsigned int num, int err) +static int end_io(struct bio *bio, unsigned int num, int err) { if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) panic("I/O error reading memory image"); @@ -778,7 +771,7 @@ static int end_io(struct bio * bio, unsigned int num, int err) return 0; } -static struct block_device * resume_bdev; +static struct block_device *resume_bdev; /** * submit - submit BIO request. @@ -791,10 +784,10 @@ static struct block_device * resume_bdev; * Then submit it and wait. */ -static int submit(int rw, pgoff_t page_off, void * page) +static int submit(int rw, pgoff_t page_off, void *page) { int error = 0; - struct bio * bio; + struct bio *bio; bio = bio_alloc(GFP_ATOMIC, 1); if (!bio) @@ -823,12 +816,12 @@ static int submit(int rw, pgoff_t page_off, void * page) return error; } -static int bio_read_page(pgoff_t page_off, void * page) +static int bio_read_page(pgoff_t page_off, void *page) { return submit(READ, page_off, page); } -static int bio_write_page(pgoff_t page_off, void * page) +static int bio_write_page(pgoff_t page_off, void *page) { return submit(WRITE, page_off, page); } @@ -838,7 +831,7 @@ static int bio_write_page(pgoff_t page_off, void * page) * I really don't think that it's foolproof but more than nothing.. */ -static const char * sanity_check(void) +static const char *sanity_check(void) { dump_info(); if (swsusp_info.version_code != LINUX_VERSION_CODE) @@ -864,7 +857,7 @@ static const char * sanity_check(void) static int check_header(void) { - const char * reason = NULL; + const char *reason = NULL; int error; if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info))) @@ -912,7 +905,7 @@ static int check_sig(void) static int data_read(struct pbe *pblist) { - struct pbe * p; + struct pbe *p; int error = 0; int i = 0; int mod = swsusp_info.image_pages / 100; @@ -950,7 +943,7 @@ static int data_read(struct pbe *pblist) static int read_pagedir(struct pbe *pblist) { struct pbe *pbpage, *p; - unsigned i = 0; + unsigned int i = 0; int error; if (!pblist) diff --git a/kernel/printk.c b/kernel/printk.c index 3cb9708209bc..e9be027bc930 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -806,7 +806,6 @@ void console_unblank(void) c->unblank(); release_console_sem(); } -EXPORT_SYMBOL(console_unblank); /* * Return the console tty driver structure and its associated index diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 863eee8bff47..5b8dd98a230e 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -406,3 +406,85 @@ int ptrace_request(struct task_struct *child, long request, return ret; } + +#ifndef __ARCH_SYS_PTRACE +static int ptrace_get_task_struct(long request, long pid, + struct task_struct **childp) +{ + struct task_struct *child; + int ret; + + /* + * Callers use child == NULL as an indication to exit early even + * when the return value is 0, so make sure it is non-NULL here. + */ + *childp = NULL; + + if (request == PTRACE_TRACEME) { + /* + * Are we already being traced? + */ + if (current->ptrace & PT_PTRACED) + return -EPERM; + ret = security_ptrace(current->parent, current); + if (ret) + return -EPERM; + /* + * Set the ptrace bit in the process ptrace flags. + */ + current->ptrace |= PT_PTRACED; + return 0; + } + + /* + * You may not mess with init + */ + if (pid == 1) + return -EPERM; + + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); + if (!child) + return -ESRCH; + + *childp = child; + return 0; +} + +asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +{ + struct task_struct *child; + long ret; + + /* + * This lock_kernel fixes a subtle race with suid exec + */ + lock_kernel(); + ret = ptrace_get_task_struct(request, pid, &child); + if (!child) + goto out; + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + goto out; + } + + ret = ptrace_check_attach(child, request == PTRACE_KILL); + if (ret < 0) + goto out_put_task_struct; + + ret = arch_ptrace(child, request, addr, data); + if (ret < 0) + goto out_put_task_struct; + + out_put_task_struct: + put_task_struct(child); + out: + unlock_kernel(); + return ret; +} +#endif /* __ARCH_SYS_PTRACE */ diff --git a/kernel/sched.c b/kernel/sched.c index b4f4eb613537..3ce26954be12 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3563,8 +3563,6 @@ int idle_cpu(int cpu) return cpu_curr(cpu) == cpu_rq(cpu)->idle; } -EXPORT_SYMBOL_GPL(idle_cpu); - /** * idle_task - return the idle task for a given cpu. * @cpu: the processor in question. @@ -4680,7 +4678,8 @@ static int migration_call(struct notifier_block *nfb, unsigned long action, #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: /* Unbind it from offline cpu so it can run. Fall thru. */ - kthread_bind(cpu_rq(cpu)->migration_thread,smp_processor_id()); + kthread_bind(cpu_rq(cpu)->migration_thread, + any_online_cpu(cpu_online_map)); kthread_stop(cpu_rq(cpu)->migration_thread); cpu_rq(cpu)->migration_thread = NULL; break; diff --git a/kernel/softirq.c b/kernel/softirq.c index f766b2fc48be..ad3295cdded5 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -470,7 +470,8 @@ static int __devinit cpu_callback(struct notifier_block *nfb, #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: /* Unbind so it can run. Fall thru. */ - kthread_bind(per_cpu(ksoftirqd, hotcpu), smp_processor_id()); + kthread_bind(per_cpu(ksoftirqd, hotcpu), + any_online_cpu(cpu_online_map)); case CPU_DEAD: p = per_cpu(ksoftirqd, hotcpu); per_cpu(ksoftirqd, hotcpu) = NULL; diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 75976209cea7..a2dcceb9437d 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -123,7 +123,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: /* Unbind so it can run. Fall thru. */ - kthread_bind(per_cpu(watchdog_task, hotcpu), smp_processor_id()); + kthread_bind(per_cpu(watchdog_task, hotcpu), + any_online_cpu(cpu_online_map)); case CPU_DEAD: p = per_cpu(watchdog_task, hotcpu); per_cpu(watchdog_task, hotcpu) = NULL; diff --git a/kernel/sys.c b/kernel/sys.c index 2fa1ed18123c..c43b3e22bbda 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -28,6 +28,7 @@ #include <linux/suspend.h> #include <linux/tty.h> #include <linux/signal.h> +#include <linux/cn_proc.h> #include <linux/compat.h> #include <linux/syscalls.h> @@ -375,18 +376,21 @@ void emergency_restart(void) } EXPORT_SYMBOL_GPL(emergency_restart); -/** - * kernel_restart - reboot the system - * - * Shutdown everything and perform a clean reboot. - * This is not safe to call in interrupt context. - */ void kernel_restart_prepare(char *cmd) { notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); system_state = SYSTEM_RESTART; device_shutdown(); } + +/** + * kernel_restart - reboot the system + * @cmd: pointer to buffer containing command to execute for restart + * or %NULL + * + * Shutdown everything and perform a clean reboot. + * This is not safe to call in interrupt context. + */ void kernel_restart(char *cmd) { kernel_restart_prepare(cmd); @@ -623,6 +627,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) current->egid = new_egid; current->gid = new_rgid; key_fsgid_changed(current); + proc_id_connector(current, PROC_EVENT_GID); return 0; } @@ -662,6 +667,7 @@ asmlinkage long sys_setgid(gid_t gid) return -EPERM; key_fsgid_changed(current); + proc_id_connector(current, PROC_EVENT_GID); return 0; } @@ -751,6 +757,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) current->fsuid = current->euid; key_fsuid_changed(current); + proc_id_connector(current, PROC_EVENT_UID); return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE); } @@ -798,6 +805,7 @@ asmlinkage long sys_setuid(uid_t uid) current->suid = new_suid; key_fsuid_changed(current); + proc_id_connector(current, PROC_EVENT_UID); return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID); } @@ -846,6 +854,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) current->suid = suid; key_fsuid_changed(current); + proc_id_connector(current, PROC_EVENT_UID); return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES); } @@ -898,6 +907,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) current->sgid = sgid; key_fsgid_changed(current); + proc_id_connector(current, PROC_EVENT_GID); return 0; } @@ -940,6 +950,7 @@ asmlinkage long sys_setfsuid(uid_t uid) } key_fsuid_changed(current); + proc_id_connector(current, PROC_EVENT_UID); security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS); @@ -968,6 +979,7 @@ asmlinkage long sys_setfsgid(gid_t gid) } current->fsgid = gid; key_fsgid_changed(current); + proc_id_connector(current, PROC_EVENT_GID); } return old_fsgid; } @@ -1485,8 +1497,6 @@ EXPORT_SYMBOL(in_egroup_p); DECLARE_RWSEM(uts_sem); -EXPORT_SYMBOL(uts_sem); - asmlinkage long sys_newuname(struct new_utsname __user * name) { int errno = 0; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8e56e2495542..9990e10192e8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -169,7 +169,7 @@ struct file_operations proc_sys_file_operations = { extern struct proc_dir_entry *proc_sys_root; -static void register_proc_table(ctl_table *, struct proc_dir_entry *); +static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *); static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); #endif @@ -952,7 +952,7 @@ static ctl_table fs_table[] = { .data = &aio_nr, .maxlen = sizeof(aio_nr), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_doulongvec_minmax, }, { .ctl_name = FS_AIO_MAX_NR, @@ -960,7 +960,7 @@ static ctl_table fs_table[] = { .data = &aio_max_nr, .maxlen = sizeof(aio_max_nr), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_doulongvec_minmax, }, #ifdef CONFIG_INOTIFY { @@ -992,10 +992,51 @@ static ctl_table dev_table[] = { extern void init_irq_proc (void); +static DEFINE_SPINLOCK(sysctl_lock); + +/* called under sysctl_lock */ +static int use_table(struct ctl_table_header *p) +{ + if (unlikely(p->unregistering)) + return 0; + p->used++; + return 1; +} + +/* called under sysctl_lock */ +static void unuse_table(struct ctl_table_header *p) +{ + if (!--p->used) + if (unlikely(p->unregistering)) + complete(p->unregistering); +} + +/* called under sysctl_lock, will reacquire if has to wait */ +static void start_unregistering(struct ctl_table_header *p) +{ + /* + * if p->used is 0, nobody will ever touch that entry again; + * we'll eliminate all paths to it before dropping sysctl_lock + */ + if (unlikely(p->used)) { + struct completion wait; + init_completion(&wait); + p->unregistering = &wait; + spin_unlock(&sysctl_lock); + wait_for_completion(&wait); + spin_lock(&sysctl_lock); + } + /* + * do not remove from the list until nobody holds it; walking the + * list in do_sysctl() relies on that. + */ + list_del_init(&p->ctl_entry); +} + void __init sysctl_init(void) { #ifdef CONFIG_PROC_FS - register_proc_table(root_table, proc_sys_root); + register_proc_table(root_table, proc_sys_root, &root_table_header); init_irq_proc(); #endif } @@ -1004,6 +1045,7 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol void __user *newval, size_t newlen) { struct list_head *tmp; + int error = -ENOTDIR; if (nlen <= 0 || nlen >= CTL_MAXNAME) return -ENOTDIR; @@ -1012,20 +1054,30 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol if (!oldlenp || get_user(old_len, oldlenp)) return -EFAULT; } + spin_lock(&sysctl_lock); tmp = &root_table_header.ctl_entry; do { struct ctl_table_header *head = list_entry(tmp, struct ctl_table_header, ctl_entry); void *context = NULL; - int error = parse_table(name, nlen, oldval, oldlenp, + + if (!use_table(head)) + continue; + + spin_unlock(&sysctl_lock); + + error = parse_table(name, nlen, oldval, oldlenp, newval, newlen, head->ctl_table, &context); kfree(context); + + spin_lock(&sysctl_lock); + unuse_table(head); if (error != -ENOTDIR) - return error; - tmp = tmp->next; - } while (tmp != &root_table_header.ctl_entry); - return -ENOTDIR; + break; + } while ((tmp = tmp->next) != &root_table_header.ctl_entry); + spin_unlock(&sysctl_lock); + return error; } asmlinkage long sys_sysctl(struct __sysctl_args __user *args) @@ -1236,12 +1288,16 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table, return NULL; tmp->ctl_table = table; INIT_LIST_HEAD(&tmp->ctl_entry); + tmp->used = 0; + tmp->unregistering = NULL; + spin_lock(&sysctl_lock); if (insert_at_head) list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); else list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); + spin_unlock(&sysctl_lock); #ifdef CONFIG_PROC_FS - register_proc_table(table, proc_sys_root); + register_proc_table(table, proc_sys_root, tmp); #endif return tmp; } @@ -1255,10 +1311,13 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table, */ void unregister_sysctl_table(struct ctl_table_header * header) { - list_del(&header->ctl_entry); + might_sleep(); + spin_lock(&sysctl_lock); + start_unregistering(header); #ifdef CONFIG_PROC_FS unregister_proc_table(header->ctl_table, proc_sys_root); #endif + spin_unlock(&sysctl_lock); kfree(header); } @@ -1269,7 +1328,7 @@ void unregister_sysctl_table(struct ctl_table_header * header) #ifdef CONFIG_PROC_FS /* Scan the sysctl entries in table and add them all into /proc */ -static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) +static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set) { struct proc_dir_entry *de; int len; @@ -1305,13 +1364,14 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) de = create_proc_entry(table->procname, mode, root); if (!de) continue; + de->set = set; de->data = (void *) table; if (table->proc_handler) de->proc_fops = &proc_sys_file_operations; } table->de = de; if (de->mode & S_IFDIR) - register_proc_table(table->child, de); + register_proc_table(table->child, de, set); } } @@ -1336,6 +1396,13 @@ static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root continue; } + /* + * In any case, mark the entry as goner; we'll keep it + * around if it's busy, but we'll know to do nothing with + * its fields. We are under sysctl_lock here. + */ + de->data = NULL; + /* Don't unregister proc entries that are still being used.. */ if (atomic_read(&de->count)) continue; @@ -1349,27 +1416,38 @@ static ssize_t do_rw_proc(int write, struct file * file, char __user * buf, size_t count, loff_t *ppos) { int op; - struct proc_dir_entry *de; + struct proc_dir_entry *de = PDE(file->f_dentry->d_inode); struct ctl_table *table; size_t res; - ssize_t error; - - de = PDE(file->f_dentry->d_inode); - if (!de || !de->data) - return -ENOTDIR; - table = (struct ctl_table *) de->data; - if (!table || !table->proc_handler) - return -ENOTDIR; - op = (write ? 002 : 004); - if (ctl_perm(table, op)) - return -EPERM; + ssize_t error = -ENOTDIR; - res = count; - - error = (*table->proc_handler) (table, write, file, buf, &res, ppos); - if (error) - return error; - return res; + spin_lock(&sysctl_lock); + if (de && de->data && use_table(de->set)) { + /* + * at that point we know that sysctl was not unregistered + * and won't be until we finish + */ + spin_unlock(&sysctl_lock); + table = (struct ctl_table *) de->data; + if (!table || !table->proc_handler) + goto out; + error = -EPERM; + op = (write ? 002 : 004); + if (ctl_perm(table, op)) + goto out; + + /* careful: calling conventions are nasty here */ + res = count; + error = (*table->proc_handler)(table, write, file, + buf, &res, ppos); + if (!error) + error = res; + out: + spin_lock(&sysctl_lock); + unuse_table(de->set); + } + spin_unlock(&sysctl_lock); + return error; } static int proc_opensys(struct inode *inode, struct file *file) @@ -1997,6 +2075,7 @@ int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer + * @ppos: pointer to the file position * * Reads/writes up to table->maxlen/sizeof(unsigned int) integer * values from/to the user buffer, treated as an ASCII string. diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7cee222231bc..42df83d7fad2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -524,7 +524,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, list_for_each_entry(wq, &workqueues, list) { /* Unbind so it can run. */ kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread, - smp_processor_id()); + any_online_cpu(cpu_online_map)); cleanup_workqueue_thread(wq, hotcpu); } break; |