diff options
author | John W. Linville <linville@tuxdriver.com> | 2012-09-07 23:07:55 +0400 |
---|---|---|
committer | John W. Linville <linville@tuxdriver.com> | 2012-09-07 23:07:55 +0400 |
commit | fac805f8c198092de9a2842efd7f5022e2937b18 (patch) | |
tree | 7557809c373f97a343c427d8fded0696060394ce /kernel | |
parent | 2461c7d60f9f3821274e4acf9019cba8b82c94b5 (diff) | |
parent | f10723841e624c0726c70356b31d91befed01dd6 (diff) | |
download | linux-fac805f8c198092de9a2842efd7f5022e2937b18.tar.xz |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 21 | ||||
-rw-r--r-- | kernel/cpu.c | 2 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 213 | ||||
-rw-r--r-- | kernel/fork.c | 46 | ||||
-rw-r--r-- | kernel/irq/handle.c | 7 | ||||
-rw-r--r-- | kernel/irq/irqdomain.c | 362 | ||||
-rw-r--r-- | kernel/irq/manage.c | 17 | ||||
-rw-r--r-- | kernel/kexec.c | 2 | ||||
-rw-r--r-- | kernel/kmod.c | 37 | ||||
-rw-r--r-- | kernel/panic.c | 8 | ||||
-rw-r--r-- | kernel/power/suspend.c | 3 | ||||
-rw-r--r-- | kernel/printk.c | 32 | ||||
-rw-r--r-- | kernel/resource.c | 24 | ||||
-rw-r--r-- | kernel/sched/core.c | 2 | ||||
-rw-r--r-- | kernel/softirq.c | 9 | ||||
-rw-r--r-- | kernel/sys.c | 57 | ||||
-rw-r--r-- | kernel/sysctl.c | 69 | ||||
-rw-r--r-- | kernel/sysctl_binary.c | 2 | ||||
-rw-r--r-- | kernel/taskstats.c | 5 | ||||
-rw-r--r-- | kernel/watchdog.c | 21 |
20 files changed, 563 insertions, 376 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 4a3f28d2ca65..ea3b7b6191c7 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1456,6 +1456,27 @@ void audit_log_key(struct audit_buffer *ab, char *key) } /** + * audit_log_link_denied - report a link restriction denial + * @operation: specific link opreation + * @link: the path that triggered the restriction + */ +void audit_log_link_denied(const char *operation, struct path *link) +{ + struct audit_buffer *ab; + + ab = audit_log_start(current->audit_context, GFP_KERNEL, + AUDIT_ANOM_LINK); + audit_log_format(ab, "op=%s action=denied", operation); + audit_log_format(ab, " pid=%d comm=", current->pid); + audit_log_untrustedstring(ab, current->comm); + audit_log_d_path(ab, " path=", link); + audit_log_format(ab, " dev="); + audit_log_untrustedstring(ab, link->dentry->d_inode->i_sb->s_id); + audit_log_format(ab, " ino=%lu", link->dentry->d_inode->i_ino); + audit_log_end(ab); +} + +/** * audit_log_end - end one audit record * @ab: the audit_buffer * diff --git a/kernel/cpu.c b/kernel/cpu.c index a4eb5227a19e..14d32588cccd 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -416,7 +416,7 @@ int __cpuinit cpu_up(unsigned int cpu) if (pgdat->node_zonelists->_zonerefs->zone == NULL) { mutex_lock(&zonelists_mutex); - build_all_zonelists(NULL); + build_all_zonelists(NULL, NULL); mutex_unlock(&zonelists_mutex); } #endif diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index f93532748bca..c08a22d02f72 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -32,6 +32,7 @@ #include <linux/swap.h> /* try_to_free_swap */ #include <linux/ptrace.h> /* user_enable_single_step */ #include <linux/kdebug.h> /* notifier mechanism */ +#include "../../mm/internal.h" /* munlock_vma_page */ #include <linux/uprobes.h> @@ -112,14 +113,14 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register) return false; } -static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) +static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset) { - loff_t vaddr; - - vaddr = vma->vm_start + offset; - vaddr -= vma->vm_pgoff << PAGE_SHIFT; + return vma->vm_start + offset - ((loff_t)vma->vm_pgoff << PAGE_SHIFT); +} - return vaddr; +static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr) +{ + return ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (vaddr - vma->vm_start); } /** @@ -127,25 +128,27 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) * based on replace_page in mm/ksm.c * * @vma: vma that holds the pte pointing to page + * @addr: address the old @page is mapped at * @page: the cowed page we are replacing by kpage * @kpage: the modified page we replace page by * * Returns 0 on success, -EFAULT on failure. */ -static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) +static int __replace_page(struct vm_area_struct *vma, unsigned long addr, + struct page *page, struct page *kpage) { struct mm_struct *mm = vma->vm_mm; - unsigned long addr; spinlock_t *ptl; pte_t *ptep; + int err; - addr = page_address_in_vma(page, vma); - if (addr == -EFAULT) - return -EFAULT; + /* For try_to_free_swap() and munlock_vma_page() below */ + lock_page(page); + err = -EAGAIN; ptep = page_check_address(page, mm, addr, &ptl, 0); if (!ptep) - return -EAGAIN; + goto unlock; get_page(kpage); page_add_new_anon_rmap(kpage, vma, addr); @@ -162,10 +165,16 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page_remove_rmap(page); if (!page_mapped(page)) try_to_free_swap(page); - put_page(page); pte_unmap_unlock(ptep, ptl); - return 0; + if (vma->vm_flags & VM_LOCKED) + munlock_vma_page(page); + put_page(page); + + err = 0; + unlock: + unlock_page(page); + return err; } /** @@ -206,45 +215,23 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) { struct page *old_page, *new_page; - struct address_space *mapping; void *vaddr_old, *vaddr_new; struct vm_area_struct *vma; - struct uprobe *uprobe; int ret; + retry: /* Read the page with vaddr into memory */ ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); if (ret <= 0) return ret; - ret = -EINVAL; - - /* - * We are interested in text pages only. Our pages of interest - * should be mapped for read and execute only. We desist from - * adding probes in write mapped pages since the breakpoints - * might end up in the file copy. - */ - if (!valid_vma(vma, is_swbp_insn(&opcode))) - goto put_out; - - uprobe = container_of(auprobe, struct uprobe, arch); - mapping = uprobe->inode->i_mapping; - if (mapping != vma->vm_file->f_mapping) - goto put_out; - ret = -ENOMEM; new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); if (!new_page) - goto put_out; + goto put_old; __SetPageUptodate(new_page); - /* - * lock page will serialize against do_wp_page()'s - * PageAnon() handling - */ - lock_page(old_page); /* copy the page now that we've got it stable */ vaddr_old = kmap_atomic(old_page); vaddr_new = kmap_atomic(new_page); @@ -257,17 +244,13 @@ retry: ret = anon_vma_prepare(vma); if (ret) - goto unlock_out; + goto put_new; - lock_page(new_page); - ret = __replace_page(vma, old_page, new_page); - unlock_page(new_page); + ret = __replace_page(vma, vaddr, old_page, new_page); -unlock_out: - unlock_page(old_page); +put_new: page_cache_release(new_page); - -put_out: +put_old: put_page(old_page); if (unlikely(ret == -EAGAIN)) @@ -791,7 +774,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) curr = info; info->mm = vma->vm_mm; - info->vaddr = vma_address(vma, offset); + info->vaddr = offset_to_vaddr(vma, offset); } mutex_unlock(&mapping->i_mmap_mutex); @@ -839,12 +822,13 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) goto free; down_write(&mm->mmap_sem); - vma = find_vma(mm, (unsigned long)info->vaddr); - if (!vma || !valid_vma(vma, is_register)) + vma = find_vma(mm, info->vaddr); + if (!vma || !valid_vma(vma, is_register) || + vma->vm_file->f_mapping->host != uprobe->inode) goto unlock; - if (vma->vm_file->f_mapping->host != uprobe->inode || - vma_address(vma, uprobe->offset) != info->vaddr) + if (vma->vm_start > info->vaddr || + vaddr_to_offset(vma, info->vaddr) != uprobe->offset) goto unlock; if (is_register) { @@ -960,59 +944,66 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume put_uprobe(uprobe); } -/* - * Of all the nodes that correspond to the given inode, return the node - * with the least offset. - */ -static struct rb_node *find_least_offset_node(struct inode *inode) +static struct rb_node * +find_node_in_range(struct inode *inode, loff_t min, loff_t max) { - struct uprobe u = { .inode = inode, .offset = 0}; struct rb_node *n = uprobes_tree.rb_node; - struct rb_node *close_node = NULL; - struct uprobe *uprobe; - int match; while (n) { - uprobe = rb_entry(n, struct uprobe, rb_node); - match = match_uprobe(&u, uprobe); - - if (uprobe->inode == inode) - close_node = n; - - if (!match) - return close_node; + struct uprobe *u = rb_entry(n, struct uprobe, rb_node); - if (match < 0) + if (inode < u->inode) { n = n->rb_left; - else + } else if (inode > u->inode) { n = n->rb_right; + } else { + if (max < u->offset) + n = n->rb_left; + else if (min > u->offset) + n = n->rb_right; + else + break; + } } - return close_node; + return n; } /* - * For a given inode, build a list of probes that need to be inserted. + * For a given range in vma, build a list of probes that need to be inserted. */ -static void build_probe_list(struct inode *inode, struct list_head *head) +static void build_probe_list(struct inode *inode, + struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct list_head *head) { - struct uprobe *uprobe; + loff_t min, max; unsigned long flags; - struct rb_node *n; - - spin_lock_irqsave(&uprobes_treelock, flags); - - n = find_least_offset_node(inode); + struct rb_node *n, *t; + struct uprobe *u; - for (; n; n = rb_next(n)) { - uprobe = rb_entry(n, struct uprobe, rb_node); - if (uprobe->inode != inode) - break; + INIT_LIST_HEAD(head); + min = vaddr_to_offset(vma, start); + max = min + (end - start) - 1; - list_add(&uprobe->pending_list, head); - atomic_inc(&uprobe->ref); + spin_lock_irqsave(&uprobes_treelock, flags); + n = find_node_in_range(inode, min, max); + if (n) { + for (t = n; t; t = rb_prev(t)) { + u = rb_entry(t, struct uprobe, rb_node); + if (u->inode != inode || u->offset < min) + break; + list_add(&u->pending_list, head); + atomic_inc(&u->ref); + } + for (t = n; (t = rb_next(t)); ) { + u = rb_entry(t, struct uprobe, rb_node); + if (u->inode != inode || u->offset > max) + break; + list_add(&u->pending_list, head); + atomic_inc(&u->ref); + } } - spin_unlock_irqrestore(&uprobes_treelock, flags); } @@ -1031,7 +1022,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head) int uprobe_mmap(struct vm_area_struct *vma) { struct list_head tmp_list; - struct uprobe *uprobe; + struct uprobe *uprobe, *u; struct inode *inode; int ret, count; @@ -1042,21 +1033,15 @@ int uprobe_mmap(struct vm_area_struct *vma) if (!inode) return 0; - INIT_LIST_HEAD(&tmp_list); mutex_lock(uprobes_mmap_hash(inode)); - build_probe_list(inode, &tmp_list); + build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); ret = 0; count = 0; - list_for_each_entry(uprobe, &tmp_list, pending_list) { + list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { if (!ret) { - loff_t vaddr = vma_address(vma, uprobe->offset); - - if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { - put_uprobe(uprobe); - continue; - } + unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); /* @@ -1097,12 +1082,15 @@ int uprobe_mmap(struct vm_area_struct *vma) void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct list_head tmp_list; - struct uprobe *uprobe; + struct uprobe *uprobe, *u; struct inode *inode; if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) return; + if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ + return; + if (!atomic_read(&vma->vm_mm->uprobes_state.count)) return; @@ -1110,21 +1098,17 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon if (!inode) return; - INIT_LIST_HEAD(&tmp_list); mutex_lock(uprobes_mmap_hash(inode)); - build_probe_list(inode, &tmp_list); - - list_for_each_entry(uprobe, &tmp_list, pending_list) { - loff_t vaddr = vma_address(vma, uprobe->offset); - - if (vaddr >= start && vaddr < end) { - /* - * An unregister could have removed the probe before - * unmap. So check before we decrement the count. - */ - if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1) - atomic_dec(&vma->vm_mm->uprobes_state.count); - } + build_probe_list(inode, vma, start, end, &tmp_list); + + list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { + unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); + /* + * An unregister could have removed the probe before + * unmap. So check before we decrement the count. + */ + if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1) + atomic_dec(&vma->vm_mm->uprobes_state.count); put_uprobe(uprobe); } mutex_unlock(uprobes_mmap_hash(inode)); @@ -1463,12 +1447,9 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) vma = find_vma(mm, bp_vaddr); if (vma && vma->vm_start <= bp_vaddr) { if (valid_vma(vma, false)) { - struct inode *inode; - loff_t offset; + struct inode *inode = vma->vm_file->f_mapping->host; + loff_t offset = vaddr_to_offset(vma, bp_vaddr); - inode = vma->vm_file->f_mapping->host; - offset = bp_vaddr - vma->vm_start; - offset += (vma->vm_pgoff << PAGE_SHIFT); uprobe = find_uprobe(inode, offset); } diff --git a/kernel/fork.c b/kernel/fork.c index ff1cad3b7bdc..3bd2280d79f6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -114,6 +114,10 @@ int nr_processes(void) return total; } +void __weak arch_release_task_struct(struct task_struct *tsk) +{ +} + #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR static struct kmem_cache *task_struct_cachep; @@ -122,17 +126,17 @@ static inline struct task_struct *alloc_task_struct_node(int node) return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node); } -void __weak arch_release_task_struct(struct task_struct *tsk) { } - static inline void free_task_struct(struct task_struct *tsk) { - arch_release_task_struct(tsk); kmem_cache_free(task_struct_cachep, tsk); } #endif +void __weak arch_release_thread_info(struct thread_info *ti) +{ +} + #ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR -void __weak arch_release_thread_info(struct thread_info *ti) { } /* * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a @@ -150,7 +154,6 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, static inline void free_thread_info(struct thread_info *ti) { - arch_release_thread_info(ti); free_pages((unsigned long)ti, THREAD_SIZE_ORDER); } # else @@ -164,7 +167,6 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, static void free_thread_info(struct thread_info *ti) { - arch_release_thread_info(ti); kmem_cache_free(thread_info_cache, ti); } @@ -205,10 +207,12 @@ static void account_kernel_stack(struct thread_info *ti, int account) void free_task(struct task_struct *tsk) { account_kernel_stack(tsk->stack, -1); + arch_release_thread_info(tsk->stack); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); put_seccomp_filter(tsk); + arch_release_task_struct(tsk); free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -298,23 +302,16 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) return NULL; ti = alloc_thread_info_node(tsk, node); - if (!ti) { - free_task_struct(tsk); - return NULL; - } + if (!ti) + goto free_tsk; err = arch_dup_task_struct(tsk, orig); + if (err) + goto free_ti; - /* - * We defer looking at err, because we will need this setup - * for the clean up path to work correctly. - */ tsk->stack = ti; - setup_thread_stack(tsk, orig); - - if (err) - goto out; + setup_thread_stack(tsk, orig); clear_user_return_notifier(tsk); clear_tsk_need_resched(tsk); stackend = end_of_stack(tsk); @@ -338,8 +335,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) return tsk; -out: +free_ti: free_thread_info(ti); +free_tsk: free_task_struct(tsk); return NULL; } @@ -383,16 +381,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) struct file *file; if (mpnt->vm_flags & VM_DONTCOPY) { - long pages = vma_pages(mpnt); - mm->total_vm -= pages; vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, - -pages); + -vma_pages(mpnt)); continue; } charge = 0; if (mpnt->vm_flags & VM_ACCOUNT) { - unsigned long len; - len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; + unsigned long len = vma_pages(mpnt); + if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ goto fail_nomem; charge = len; @@ -1310,7 +1306,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index bdb180325551..131ca176b497 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -133,7 +133,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action) { irqreturn_t retval = IRQ_NONE; - unsigned int random = 0, irq = desc->irq_data.irq; + unsigned int flags = 0, irq = desc->irq_data.irq; do { irqreturn_t res; @@ -161,7 +161,7 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action) /* Fall through to add to randomness */ case IRQ_HANDLED: - random |= action->flags; + flags |= action->flags; break; default: @@ -172,8 +172,7 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action) action = action->next; } while (action); - if (random & IRQF_SAMPLE_RANDOM) - add_interrupt_randomness(irq); + add_interrupt_randomness(irq, flags); if (!noirqdebug) note_interrupt(irq, desc, retval); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 38c5eb839c92..49a77727db42 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -10,6 +10,7 @@ #include <linux/mutex.h> #include <linux/of.h> #include <linux/of_address.h> +#include <linux/topology.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/smp.h> @@ -45,7 +46,8 @@ static struct irq_domain *irq_domain_alloc(struct device_node *of_node, { struct irq_domain *domain; - domain = kzalloc(sizeof(*domain), GFP_KERNEL); + domain = kzalloc_node(sizeof(*domain), GFP_KERNEL, + of_node_to_nid(of_node)); if (WARN_ON(!domain)) return NULL; @@ -138,6 +140,36 @@ static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain, } /** + * irq_domain_add_simple() - Allocate and register a simple irq_domain. + * @of_node: pointer to interrupt controller's device tree node. + * @size: total number of irqs in mapping + * @first_irq: first number of irq block assigned to the domain + * @ops: map/unmap domain callbacks + * @host_data: Controller private data pointer + * + * Allocates a legacy irq_domain if irq_base is positive or a linear + * domain otherwise. + * + * This is intended to implement the expected behaviour for most + * interrupt controllers which is that a linear mapping should + * normally be used unless the system requires a legacy mapping in + * order to support supplying interrupt numbers during non-DT + * registration of devices. + */ +struct irq_domain *irq_domain_add_simple(struct device_node *of_node, + unsigned int size, + unsigned int first_irq, + const struct irq_domain_ops *ops, + void *host_data) +{ + if (first_irq > 0) + return irq_domain_add_legacy(of_node, size, first_irq, 0, + ops, host_data); + else + return irq_domain_add_linear(of_node, size, ops, host_data); +} + +/** * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain. * @of_node: pointer to interrupt controller's device tree node. * @size: total number of irqs in legacy mapping @@ -203,7 +235,8 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, * one can then use irq_create_mapping() to * explicitly change them */ - ops->map(domain, irq, hwirq); + if (ops->map) + ops->map(domain, irq, hwirq); /* Clear norequest flags */ irq_clear_status_flags(irq, IRQ_NOREQUEST); @@ -215,7 +248,7 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, EXPORT_SYMBOL_GPL(irq_domain_add_legacy); /** - * irq_domain_add_linear() - Allocate and register a legacy revmap irq_domain. + * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain. * @of_node: pointer to interrupt controller's device tree node. * @size: Number of interrupts in the domain. * @ops: map/unmap domain callbacks @@ -229,7 +262,8 @@ struct irq_domain *irq_domain_add_linear(struct device_node *of_node, struct irq_domain *domain; unsigned int *revmap; - revmap = kzalloc(sizeof(*revmap) * size, GFP_KERNEL); + revmap = kzalloc_node(sizeof(*revmap) * size, GFP_KERNEL, + of_node_to_nid(of_node)); if (WARN_ON(!revmap)) return NULL; @@ -330,24 +364,112 @@ void irq_set_default_host(struct irq_domain *domain) } EXPORT_SYMBOL_GPL(irq_set_default_host); -static int irq_setup_virq(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq) +static void irq_domain_disassociate_many(struct irq_domain *domain, + unsigned int irq_base, int count) { - struct irq_data *irq_data = irq_get_irq_data(virq); + /* + * disassociate in reverse order; + * not strictly necessary, but nice for unwinding + */ + while (count--) { + int irq = irq_base + count; + struct irq_data *irq_data = irq_get_irq_data(irq); + irq_hw_number_t hwirq = irq_data->hwirq; + + if (WARN_ON(!irq_data || irq_data->domain != domain)) + continue; + + irq_set_status_flags(irq, IRQ_NOREQUEST); + + /* remove chip and handler */ + irq_set_chip_and_handler(irq, NULL, NULL); + + /* Make sure it's completed */ + synchronize_irq(irq); + + /* Tell the PIC about it */ + if (domain->ops->unmap) + domain->ops->unmap(domain, irq); + smp_mb(); - irq_data->hwirq = hwirq; - irq_data->domain = domain; - if (domain->ops->map(domain, virq, hwirq)) { - pr_debug("irq-%i==>hwirq-0x%lx mapping failed\n", virq, hwirq); irq_data->domain = NULL; irq_data->hwirq = 0; - return -1; + + /* Clear reverse map */ + switch(domain->revmap_type) { + case IRQ_DOMAIN_MAP_LINEAR: + if (hwirq < domain->revmap_data.linear.size) + domain->revmap_data.linear.revmap[hwirq] = 0; + break; + case IRQ_DOMAIN_MAP_TREE: + mutex_lock(&revmap_trees_mutex); + radix_tree_delete(&domain->revmap_data.tree, hwirq); + mutex_unlock(&revmap_trees_mutex); + break; + } } +} + +int irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, + irq_hw_number_t hwirq_base, int count) +{ + unsigned int virq = irq_base; + irq_hw_number_t hwirq = hwirq_base; + int i, ret; + + pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__, + of_node_full_name(domain->of_node), irq_base, (int)hwirq_base, count); + + for (i = 0; i < count; i++) { + struct irq_data *irq_data = irq_get_irq_data(virq + i); + + if (WARN(!irq_data, "error: irq_desc not allocated; " + "irq=%i hwirq=0x%x\n", virq + i, (int)hwirq + i)) + return -EINVAL; + if (WARN(irq_data->domain, "error: irq_desc already associated; " + "irq=%i hwirq=0x%x\n", virq + i, (int)hwirq + i)) + return -EINVAL; + }; + + for (i = 0; i < count; i++, virq++, hwirq++) { + struct irq_data *irq_data = irq_get_irq_data(virq); + + irq_data->hwirq = hwirq; + irq_data->domain = domain; + if (domain->ops->map) { + ret = domain->ops->map(domain, virq, hwirq); + if (ret != 0) { + pr_err("irq-%i==>hwirq-0x%lx mapping failed: %d\n", + virq, hwirq, ret); + WARN_ON(1); + irq_data->domain = NULL; + irq_data->hwirq = 0; + goto err_unmap; + } + } - irq_clear_status_flags(virq, IRQ_NOREQUEST); + switch (domain->revmap_type) { + case IRQ_DOMAIN_MAP_LINEAR: + if (hwirq < domain->revmap_data.linear.size) + domain->revmap_data.linear.revmap[hwirq] = virq; + break; + case IRQ_DOMAIN_MAP_TREE: + mutex_lock(&revmap_trees_mutex); + radix_tree_insert(&domain->revmap_data.tree, hwirq, irq_data); + mutex_unlock(&revmap_trees_mutex); + break; + } + + irq_clear_status_flags(virq, IRQ_NOREQUEST); + } return 0; + + err_unmap: + irq_domain_disassociate_many(domain, irq_base, i); + return -EINVAL; } +EXPORT_SYMBOL_GPL(irq_domain_associate_many); /** * irq_create_direct_mapping() - Allocate an irq for direct mapping @@ -364,10 +486,10 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) if (domain == NULL) domain = irq_default_domain; - BUG_ON(domain == NULL); - WARN_ON(domain->revmap_type != IRQ_DOMAIN_MAP_NOMAP); + if (WARN_ON(!domain || domain->revmap_type != IRQ_DOMAIN_MAP_NOMAP)) + return 0; - virq = irq_alloc_desc_from(1, 0); + virq = irq_alloc_desc_from(1, of_node_to_nid(domain->of_node)); if (!virq) { pr_debug("create_direct virq allocation failed\n"); return 0; @@ -380,7 +502,7 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) } pr_debug("create_direct obtained virq %d\n", virq); - if (irq_setup_virq(domain, virq, virq)) { + if (irq_domain_associate(domain, virq, virq)) { irq_free_desc(virq); return 0; } @@ -433,17 +555,16 @@ unsigned int irq_create_mapping(struct irq_domain *domain, hint = hwirq % nr_irqs; if (hint == 0) hint++; - virq = irq_alloc_desc_from(hint, 0); + virq = irq_alloc_desc_from(hint, of_node_to_nid(domain->of_node)); if (virq <= 0) - virq = irq_alloc_desc_from(1, 0); + virq = irq_alloc_desc_from(1, of_node_to_nid(domain->of_node)); if (virq <= 0) { pr_debug("-> virq allocation failed\n"); return 0; } - if (irq_setup_virq(domain, virq, hwirq)) { - if (domain->revmap_type != IRQ_DOMAIN_MAP_LEGACY) - irq_free_desc(virq); + if (irq_domain_associate(domain, virq, hwirq)) { + irq_free_desc(virq); return 0; } @@ -454,6 +575,44 @@ unsigned int irq_create_mapping(struct irq_domain *domain, } EXPORT_SYMBOL_GPL(irq_create_mapping); +/** + * irq_create_strict_mappings() - Map a range of hw irqs to fixed linux irqs + * @domain: domain owning the interrupt range + * @irq_base: beginning of linux IRQ range + * @hwirq_base: beginning of hardware IRQ range + * @count: Number of interrupts to map + * + * This routine is used for allocating and mapping a range of hardware + * irqs to linux irqs where the linux irq numbers are at pre-defined + * locations. For use by controllers that already have static mappings + * to insert in to the domain. + * + * Non-linear users can use irq_create_identity_mapping() for IRQ-at-a-time + * domain insertion. + * + * 0 is returned upon success, while any failure to establish a static + * mapping is treated as an error. + */ +int irq_create_strict_mappings(struct irq_domain *domain, unsigned int irq_base, + irq_hw_number_t hwirq_base, int count) +{ + int ret; + + ret = irq_alloc_descs(irq_base, irq_base, count, + of_node_to_nid(domain->of_node)); + if (unlikely(ret < 0)) + return ret; + + ret = irq_domain_associate_many(domain, irq_base, hwirq_base, count); + if (unlikely(ret < 0)) { + irq_free_descs(irq_base, count); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(irq_create_strict_mappings); + unsigned int irq_create_of_mapping(struct device_node *controller, const u32 *intspec, unsigned int intsize) { @@ -511,7 +670,6 @@ void irq_dispose_mapping(unsigned int virq) { struct irq_data *irq_data = irq_get_irq_data(virq); struct irq_domain *domain; - irq_hw_number_t hwirq; if (!virq || !irq_data) return; @@ -524,33 +682,7 @@ void irq_dispose_mapping(unsigned int virq) if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) return; - irq_set_status_flags(virq, IRQ_NOREQUEST); - - /* remove chip and handler */ - irq_set_chip_and_handler(virq, NULL, NULL); - - /* Make sure it's completed */ - synchronize_irq(virq); - - /* Tell the PIC about it */ - if (domain->ops->unmap) - domain->ops->unmap(domain, virq); - smp_mb(); - - /* Clear reverse map */ - hwirq = irq_data->hwirq; - switch(domain->revmap_type) { - case IRQ_DOMAIN_MAP_LINEAR: - if (hwirq < domain->revmap_data.linear.size) - domain->revmap_data.linear.revmap[hwirq] = 0; - break; - case IRQ_DOMAIN_MAP_TREE: - mutex_lock(&revmap_trees_mutex); - radix_tree_delete(&domain->revmap_data.tree, hwirq); - mutex_unlock(&revmap_trees_mutex); - break; - } - + irq_domain_disassociate_many(domain, virq, 1); irq_free_desc(virq); } EXPORT_SYMBOL_GPL(irq_dispose_mapping); @@ -559,16 +691,11 @@ EXPORT_SYMBOL_GPL(irq_dispose_mapping); * irq_find_mapping() - Find a linux irq from an hw irq number. * @domain: domain owning this hardware interrupt * @hwirq: hardware irq number in that domain space - * - * This is a slow path, for use by generic code. It's expected that an - * irq controller implementation directly calls the appropriate low level - * mapping function. */ unsigned int irq_find_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { - unsigned int i; - unsigned int hint = hwirq % nr_irqs; + struct irq_data *data; /* Look for default domain if nececssary */ if (domain == NULL) @@ -576,115 +703,47 @@ unsigned int irq_find_mapping(struct irq_domain *domain, if (domain == NULL) return 0; - /* legacy -> bail early */ - if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY) + switch (domain->revmap_type) { + case IRQ_DOMAIN_MAP_LEGACY: return irq_domain_legacy_revmap(domain, hwirq); - - /* Slow path does a linear search of the map */ - if (hint == 0) - hint = 1; - i = hint; - do { - struct irq_data *data = irq_get_irq_data(i); + case IRQ_DOMAIN_MAP_LINEAR: + return irq_linear_revmap(domain, hwirq); + case IRQ_DOMAIN_MAP_TREE: + rcu_read_lock(); + data = radix_tree_lookup(&domain->revmap_data.tree, hwirq); + rcu_read_unlock(); + if (data) + return data->irq; + break; + case IRQ_DOMAIN_MAP_NOMAP: + data = irq_get_irq_data(hwirq); if (data && (data->domain == domain) && (data->hwirq == hwirq)) - return i; - i++; - if (i >= nr_irqs) - i = 1; - } while(i != hint); + return hwirq; + break; + } + return 0; } EXPORT_SYMBOL_GPL(irq_find_mapping); /** - * irq_radix_revmap_lookup() - Find a linux irq from a hw irq number. - * @domain: domain owning this hardware interrupt - * @hwirq: hardware irq number in that domain space - * - * This is a fast path, for use by irq controller code that uses radix tree - * revmaps - */ -unsigned int irq_radix_revmap_lookup(struct irq_domain *domain, - irq_hw_number_t hwirq) -{ - struct irq_data *irq_data; - - if (WARN_ON_ONCE(domain->revmap_type != IRQ_DOMAIN_MAP_TREE)) - return irq_find_mapping(domain, hwirq); - - /* - * Freeing an irq can delete nodes along the path to - * do the lookup via call_rcu. - */ - rcu_read_lock(); - irq_data = radix_tree_lookup(&domain->revmap_data.tree, hwirq); - rcu_read_unlock(); - - /* - * If found in radix tree, then fine. - * Else fallback to linear lookup - this should not happen in practice - * as it means that we failed to insert the node in the radix tree. - */ - return irq_data ? irq_data->irq : irq_find_mapping(domain, hwirq); -} -EXPORT_SYMBOL_GPL(irq_radix_revmap_lookup); - -/** - * irq_radix_revmap_insert() - Insert a hw irq to linux irq number mapping. - * @domain: domain owning this hardware interrupt - * @virq: linux irq number - * @hwirq: hardware irq number in that domain space - * - * This is for use by irq controllers that use a radix tree reverse - * mapping for fast lookup. - */ -void irq_radix_revmap_insert(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq) -{ - struct irq_data *irq_data = irq_get_irq_data(virq); - - if (WARN_ON(domain->revmap_type != IRQ_DOMAIN_MAP_TREE)) - return; - - if (virq) { - mutex_lock(&revmap_trees_mutex); - radix_tree_insert(&domain->revmap_data.tree, hwirq, irq_data); - mutex_unlock(&revmap_trees_mutex); - } -} -EXPORT_SYMBOL_GPL(irq_radix_revmap_insert); - -/** * irq_linear_revmap() - Find a linux irq from a hw irq number. * @domain: domain owning this hardware interrupt * @hwirq: hardware irq number in that domain space * - * This is a fast path, for use by irq controller code that uses linear - * revmaps. It does fallback to the slow path if the revmap doesn't exist - * yet and will create the revmap entry with appropriate locking + * This is a fast path that can be called directly by irq controller code to + * save a handful of instructions. */ unsigned int irq_linear_revmap(struct irq_domain *domain, irq_hw_number_t hwirq) { - unsigned int *revmap; - - if (WARN_ON_ONCE(domain->revmap_type != IRQ_DOMAIN_MAP_LINEAR)) - return irq_find_mapping(domain, hwirq); + BUG_ON(domain->revmap_type != IRQ_DOMAIN_MAP_LINEAR); - /* Check revmap bounds */ - if (unlikely(hwirq >= domain->revmap_data.linear.size)) - return irq_find_mapping(domain, hwirq); - - /* Check if revmap was allocated */ - revmap = domain->revmap_data.linear.revmap; - if (unlikely(revmap == NULL)) - return irq_find_mapping(domain, hwirq); - - /* Fill up revmap with slow path if no mapping found */ - if (unlikely(!revmap[hwirq])) - revmap[hwirq] = irq_find_mapping(domain, hwirq); + /* Check revmap bounds; complain if exceeded */ + if (WARN_ON(hwirq >= domain->revmap_data.linear.size)) + return 0; - return revmap[hwirq]; + return domain->revmap_data.linear.revmap[hwirq]; } EXPORT_SYMBOL_GPL(irq_linear_revmap); @@ -761,12 +820,6 @@ static int __init irq_debugfs_init(void) __initcall(irq_debugfs_init); #endif /* CONFIG_IRQ_DOMAIN_DEBUG */ -static int irq_domain_simple_map(struct irq_domain *d, unsigned int irq, - irq_hw_number_t hwirq) -{ - return 0; -} - /** * irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings * @@ -829,7 +882,6 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d, EXPORT_SYMBOL_GPL(irq_domain_xlate_onetwocell); const struct irq_domain_ops irq_domain_simple_ops = { - .map = irq_domain_simple_map, .xlate = irq_domain_xlate_onetwocell, }; EXPORT_SYMBOL_GPL(irq_domain_simple_ops); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 814c9ef6bba1..0a8e8f059627 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -893,22 +893,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) return -ENOSYS; if (!try_module_get(desc->owner)) return -ENODEV; - /* - * Some drivers like serial.c use request_irq() heavily, - * so we have to be careful not to interfere with a - * running system. - */ - if (new->flags & IRQF_SAMPLE_RANDOM) { - /* - * This function might sleep, we want to call it first, - * outside of the atomic block. - * Yes, this might clear the entropy pool if the wrong - * driver is attempted to be loaded, without actually - * installing a new handler, but is this really a problem, - * only the sysadmin is able to do this. - */ - rand_initialize_irq(irq); - } /* * Check whether the interrupt nests into another interrupt @@ -1354,7 +1338,6 @@ EXPORT_SYMBOL(free_irq); * Flags: * * IRQF_SHARED Interrupt is shared - * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy * IRQF_TRIGGER_* Specify active edge(s) or level * */ diff --git a/kernel/kexec.c b/kernel/kexec.c index 4e2e472f6aeb..0668d58d6413 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1424,7 +1424,7 @@ static void update_vmcoreinfo_note(void) void crash_save_vmcoreinfo(void) { - vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds()); + vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); update_vmcoreinfo_note(); } diff --git a/kernel/kmod.c b/kernel/kmod.c index ff2c7cb86d77..6f99aead66c6 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -45,6 +45,13 @@ extern int max_threads; static struct workqueue_struct *khelper_wq; +/* + * kmod_thread_locker is used for deadlock avoidance. There is no explicit + * locking to protect this global - it is private to the singleton khelper + * thread and should only ever be modified by that thread. + */ +static const struct task_struct *kmod_thread_locker; + #define CAP_BSET (void *)1 #define CAP_PI (void *)2 @@ -221,6 +228,13 @@ fail: return 0; } +static int call_helper(void *data) +{ + /* Worker thread started blocking khelper thread. */ + kmod_thread_locker = current; + return ____call_usermodehelper(data); +} + static void call_usermodehelper_freeinfo(struct subprocess_info *info) { if (info->cleanup) @@ -295,9 +309,12 @@ static void __call_usermodehelper(struct work_struct *work) if (wait == UMH_WAIT_PROC) pid = kernel_thread(wait_for_helper, sub_info, CLONE_FS | CLONE_FILES | SIGCHLD); - else - pid = kernel_thread(____call_usermodehelper, sub_info, + else { + pid = kernel_thread(call_helper, sub_info, CLONE_VFORK | SIGCHLD); + /* Worker thread stopped blocking khelper thread. */ + kmod_thread_locker = NULL; + } switch (wait) { case UMH_NO_WAIT: @@ -548,6 +565,16 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) retval = -EBUSY; goto out; } + /* + * Worker thread must not wait for khelper thread at below + * wait_for_completion() if the thread was created with CLONE_VFORK + * flag, for khelper thread is already waiting for the thread at + * wait_for_completion() in do_fork(). + */ + if (wait != UMH_NO_WAIT && current == kmod_thread_locker) { + retval = -EBUSY; + goto out; + } sub_info->complete = &done; sub_info->wait = wait; @@ -577,6 +604,12 @@ unlock: return retval; } +/* + * call_usermodehelper_fns() will not run the caller-provided cleanup function + * if a memory allocation failure is experienced. So the caller might need to + * check the call_usermodehelper_fns() return value: if it is -ENOMEM, perform + * the necessaary cleanup within the caller. + */ int call_usermodehelper_fns( char *path, char **argv, char **envp, int wait, int (*init)(struct subprocess_info *info, struct cred *new), diff --git a/kernel/panic.c b/kernel/panic.c index d2a5f4ecc6dd..e1b2822fff97 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -75,6 +75,14 @@ void panic(const char *fmt, ...) int state = 0; /* + * Disable local interrupts. This will prevent panic_smp_self_stop + * from deadlocking the first cpu that invokes the panic, since + * there is nothing to prevent an interrupt handler (that runs + * after the panic_lock is acquired) from invoking panic again. + */ + local_irq_disable(); + + /* * It's possible to come here directly from a panic-assertion and * not have preempt disabled. Some functions called from here want * preempt to be disabled. No point enabling it later though... diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c8b7446b27df..1da39ea248fd 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -178,6 +178,9 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) arch_suspend_enable_irqs(); BUG_ON(irqs_disabled()); + /* Kick the lockup detector */ + lockup_detector_bootcpu_resume(); + Enable_cpus: enable_nonboot_cpus(); diff --git a/kernel/printk.c b/kernel/printk.c index 50c96b5651b6..6a76ab9d4476 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -389,8 +389,10 @@ static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv, line = buf; for (i = 0; i < count; i++) { - if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len)) + if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len)) { + ret = -EFAULT; goto out; + } line += iv[i].iov_len; } @@ -1540,17 +1542,23 @@ asmlinkage int vprintk_emit(int facility, int level, lflags |= LOG_NEWLINE; } - /* strip syslog prefix and extract log level or control flags */ - if (text[0] == '<' && text[1] && text[2] == '>') { - switch (text[1]) { - case '0' ... '7': - if (level == -1) - level = text[1] - '0'; - case 'd': /* KERN_DEFAULT */ - lflags |= LOG_PREFIX; - case 'c': /* KERN_CONT */ - text += 3; - text_len -= 3; + /* strip kernel syslog prefix and extract log level or control flags */ + if (facility == 0) { + int kern_level = printk_get_level(text); + + if (kern_level) { + const char *end_of_header = printk_skip_level(text); + switch (kern_level) { + case '0' ... '7': + if (level == -1) + level = kern_level - '0'; + case 'd': /* KERN_DEFAULT */ + lflags |= LOG_PREFIX; + case 'c': /* KERN_CONT */ + break; + } + text_len -= end_of_header - text; + text = (char *)end_of_header; } } diff --git a/kernel/resource.c b/kernel/resource.c index dc8b47764443..34d45886ee84 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -7,6 +7,8 @@ * Arbitrary resource management. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/export.h> #include <linux/errno.h> #include <linux/ioport.h> @@ -791,8 +793,28 @@ void __init reserve_region_with_split(struct resource *root, resource_size_t start, resource_size_t end, const char *name) { + int abort = 0; + write_lock(&resource_lock); - __reserve_region_with_split(root, start, end, name); + if (root->start > start || root->end < end) { + pr_err("requested range [0x%llx-0x%llx] not in root %pr\n", + (unsigned long long)start, (unsigned long long)end, + root); + if (start > root->end || end < root->start) + abort = 1; + else { + if (end > root->end) + end = root->end; + if (start < root->start) + start = root->start; + pr_err("fixing request to [0x%llx-0x%llx]\n", + (unsigned long long)start, + (unsigned long long)end); + } + dump_stack(); + } + if (!abort) + __reserve_region_with_split(root, start, end, name); write_unlock(&resource_lock); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5d011ef4c0df..d325c4b2dcbb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1910,12 +1910,12 @@ static inline void prepare_task_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { + trace_sched_switch(prev, next); sched_info_switch(prev, next); perf_event_task_sched_out(prev, next); fire_sched_out_preempt_notifiers(prev, next); prepare_lock_switch(rq, next); prepare_arch_switch(next); - trace_sched_switch(prev, next); } /** diff --git a/kernel/softirq.c b/kernel/softirq.c index 671f9594e368..b73e681df09e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -210,6 +210,14 @@ asmlinkage void __do_softirq(void) __u32 pending; int max_restart = MAX_SOFTIRQ_RESTART; int cpu; + unsigned long old_flags = current->flags; + + /* + * Mask out PF_MEMALLOC s current task context is borrowed for the + * softirq. A softirq handled such as network RX might set PF_MEMALLOC + * again if the socket is related to swap + */ + current->flags &= ~PF_MEMALLOC; pending = local_softirq_pending(); account_system_vtime(current); @@ -265,6 +273,7 @@ restart: account_system_vtime(current); __local_bh_enable(SOFTIRQ_OFFSET); + tsk_restore_flags(current, old_flags, PF_MEMALLOC); } #ifndef __ARCH_HAS_DO_SOFTIRQ diff --git a/kernel/sys.c b/kernel/sys.c index 2d39a84cd857..241507f23eca 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2015,7 +2015,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, break; } me->pdeath_signal = arg2; - error = 0; break; case PR_GET_PDEATHSIG: error = put_user(me->pdeath_signal, (int __user *)arg2); @@ -2029,7 +2028,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, break; } set_dumpable(me->mm, arg2); - error = 0; break; case PR_SET_UNALIGN: @@ -2056,10 +2054,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SET_TIMING: if (arg2 != PR_TIMING_STATISTICAL) error = -EINVAL; - else - error = 0; break; - case PR_SET_NAME: comm[sizeof(me->comm)-1] = 0; if (strncpy_from_user(comm, (char __user *)arg2, @@ -2067,20 +2062,19 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EFAULT; set_task_comm(me, comm); proc_comm_connector(me); - return 0; + break; case PR_GET_NAME: get_task_comm(comm, me); if (copy_to_user((char __user *)arg2, comm, sizeof(comm))) return -EFAULT; - return 0; + break; case PR_GET_ENDIAN: error = GET_ENDIAN(me, arg2); break; case PR_SET_ENDIAN: error = SET_ENDIAN(me, arg2); break; - case PR_GET_SECCOMP: error = prctl_get_seccomp(); break; @@ -2108,7 +2102,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, current->default_timer_slack_ns; else current->timer_slack_ns = arg2; - error = 0; break; case PR_MCE_KILL: if (arg4 | arg5) @@ -2134,7 +2127,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, default: return -EINVAL; } - error = 0; break; case PR_MCE_KILL_GET: if (arg2 | arg3 | arg4 | arg5) @@ -2153,7 +2145,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, break; case PR_SET_CHILD_SUBREAPER: me->signal->is_child_subreaper = !!arg2; - error = 0; break; case PR_GET_CHILD_SUBREAPER: error = put_user(me->signal->is_child_subreaper, @@ -2195,46 +2186,52 @@ static void argv_cleanup(struct subprocess_info *info) argv_free(info->argv); } -/** - * orderly_poweroff - Trigger an orderly system poweroff - * @force: force poweroff if command execution fails - * - * This may be called from any context to trigger a system shutdown. - * If the orderly shutdown fails, it will force an immediate shutdown. - */ -int orderly_poweroff(bool force) +static int __orderly_poweroff(void) { int argc; - char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); + char **argv; static char *envp[] = { "HOME=/", "PATH=/sbin:/bin:/usr/sbin:/usr/bin", NULL }; - int ret = -ENOMEM; + int ret; + argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); if (argv == NULL) { printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", __func__, poweroff_cmd); - goto out; + return -ENOMEM; } ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_NO_WAIT, NULL, argv_cleanup, NULL); -out: - if (likely(!ret)) - return 0; - if (ret == -ENOMEM) argv_free(argv); - if (force) { + return ret; +} + +/** + * orderly_poweroff - Trigger an orderly system poweroff + * @force: force poweroff if command execution fails + * + * This may be called from any context to trigger a system shutdown. + * If the orderly shutdown fails, it will force an immediate shutdown. + */ +int orderly_poweroff(bool force) +{ + int ret = __orderly_poweroff(); + + if (ret && force) { printk(KERN_WARNING "Failed to start orderly shutdown: " "forcing the issue\n"); - /* I guess this should try to kick off some daemon to - sync and poweroff asap. Or not even bother syncing - if we're doing an emergency shutdown? */ + /* + * I guess this should try to kick off some daemon to sync and + * poweroff asap. Or not even bother syncing if we're doing an + * emergency shutdown? + */ emergency_sync(); kernel_power_off(); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4ab11879aeb4..87174ef59161 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -30,6 +30,7 @@ #include <linux/security.h> #include <linux/ctype.h> #include <linux/kmemcheck.h> +#include <linux/kmemleak.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/kernel.h> @@ -174,6 +175,11 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #endif +static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); +static int proc_dostring_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); + #ifdef CONFIG_MAGIC_SYSRQ /* Note: sysrq code uses it's own private copy */ static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; @@ -410,7 +416,7 @@ static struct ctl_table kern_table[] = { .data = core_pattern, .maxlen = CORENAME_MAX_SIZE, .mode = 0644, - .proc_handler = proc_dostring, + .proc_handler = proc_dostring_coredump, }, { .procname = "core_pipe_limit", @@ -1095,11 +1101,9 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, }, { - .procname = "nr_pdflush_threads", - .data = &nr_pdflush_threads, - .maxlen = sizeof nr_pdflush_threads, - .mode = 0444 /* read-only*/, - .proc_handler = proc_dointvec, + .procname = "nr_pdflush_threads", + .mode = 0444 /* read-only */, + .proc_handler = pdflush_proc_obsolete, }, { .procname = "swappiness", @@ -1494,11 +1498,29 @@ static struct ctl_table fs_table[] = { #endif #endif { + .procname = "protected_symlinks", + .data = &sysctl_protected_symlinks, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { + .procname = "protected_hardlinks", + .data = &sysctl_protected_hardlinks, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { .procname = "suid_dumpable", .data = &suid_dumpable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax_coredump, .extra1 = &zero, .extra2 = &two, }, @@ -1551,7 +1573,10 @@ static struct ctl_table dev_table[] = { int __init sysctl_init(void) { - register_sysctl_table(sysctl_base_table); + struct ctl_table_header *hdr; + + hdr = register_sysctl_table(sysctl_base_table); + kmemleak_not_leak(hdr); return 0; } @@ -2009,6 +2034,34 @@ int proc_dointvec_minmax(struct ctl_table *table, int write, do_proc_dointvec_minmax_conv, ¶m); } +static void validate_coredump_safety(void) +{ + if (suid_dumpable == SUID_DUMPABLE_SAFE && + core_pattern[0] != '/' && core_pattern[0] != '|') { + printk(KERN_WARNING "Unsafe core_pattern used with "\ + "suid_dumpable=2. Pipe handler or fully qualified "\ + "core dump path required.\n"); + } +} + +static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (!error) + validate_coredump_safety(); + return error; +} + +static int proc_dostring_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int error = proc_dostring(table, write, buffer, lenp, ppos); + if (!error) + validate_coredump_safety(); + return error; +} + static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos, diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index a650694883a1..65bdcf198d4e 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -147,7 +147,7 @@ static const struct bin_table bin_vm_table[] = { { CTL_INT, VM_DIRTY_RATIO, "dirty_ratio" }, /* VM_DIRTY_WB_CS "dirty_writeback_centisecs" no longer used */ /* VM_DIRTY_EXPIRE_CS "dirty_expire_centisecs" no longer used */ - { CTL_INT, VM_NR_PDFLUSH_THREADS, "nr_pdflush_threads" }, + /* VM_NR_PDFLUSH_THREADS "nr_pdflush_threads" no longer used */ { CTL_INT, VM_OVERCOMMIT_RATIO, "overcommit_ratio" }, /* VM_PAGEBUF unused */ /* VM_HUGETLB_PAGES "nr_hugepages" no longer used */ diff --git a/kernel/taskstats.c b/kernel/taskstats.c index e66046456f4f..d0a32796550f 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -436,6 +436,11 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, sizeof(struct cgroupstats)); + if (na == NULL) { + rc = -EMSGSIZE; + goto err; + } + stats = nla_data(na); memset(stats, 0, sizeof(*stats)); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 4b1dfba70f7c..69add8a9da68 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -575,7 +575,7 @@ out: /* * Create/destroy watchdog threads as CPUs come and go: */ -static int __cpuinit +static int cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { int hotcpu = (unsigned long)hcpu; @@ -610,10 +610,27 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) return NOTIFY_OK; } -static struct notifier_block __cpuinitdata cpu_nfb = { +static struct notifier_block cpu_nfb = { .notifier_call = cpu_callback }; +#ifdef CONFIG_SUSPEND +/* + * On exit from suspend we force an offline->online transition on the boot CPU + * so that the PMU state that was lost while in suspended state gets set up + * properly for the boot CPU. This information is required for restarting the + * NMI watchdog. + */ +void lockup_detector_bootcpu_resume(void) +{ + void *cpu = (void *)(long)smp_processor_id(); + + cpu_callback(&cpu_nfb, CPU_DEAD_FROZEN, cpu); + cpu_callback(&cpu_nfb, CPU_UP_PREPARE_FROZEN, cpu); + cpu_callback(&cpu_nfb, CPU_ONLINE_FROZEN, cpu); +} +#endif + void __init lockup_detector_init(void) { void *cpu = (void *)(long)smp_processor_id(); |