summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arraymap.c7
-rw-r--r--kernel/bpf/syscall.c21
-rw-r--r--kernel/bpf/verifier.c21
-rw-r--r--kernel/fork.c13
-rw-r--r--kernel/kcov.c4
-rw-r--r--kernel/power/hibernate.c7
-rw-r--r--kernel/sched/debug.c4
-rw-r--r--kernel/sched/fair.c49
-rw-r--r--kernel/trace/Kconfig1
-rw-r--r--kernel/trace/bpf_trace.c100
-rw-r--r--kernel/trace/ftrace_internal.h22
-rw-r--r--kernel/trace/preemptirq_delay_test.c38
-rw-r--r--kernel/trace/ring_buffer.c34
-rw-r--r--kernel/trace/trace.c16
-rw-r--r--kernel/trace/trace_boot.c20
-rw-r--r--kernel/trace/trace_kprobe.c8
-rw-r--r--kernel/umh.c11
17 files changed, 250 insertions, 126 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 95d77770353c..1d6120fd5ba6 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -486,7 +486,12 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
if (!(map->map_flags & BPF_F_MMAPABLE))
return -EINVAL;
- return remap_vmalloc_range(vma, array_map_vmalloc_addr(array), pgoff);
+ if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) >
+ PAGE_ALIGN((u64)array->map.max_entries * array->elem_size))
+ return -EINVAL;
+
+ return remap_vmalloc_range(vma, array_map_vmalloc_addr(array),
+ vma->vm_pgoff + pgoff);
}
const struct bpf_map_ops array_map_ops = {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7626b8024471..4e6dee19a668 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -623,9 +623,20 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
mutex_lock(&map->freeze_mutex);
- if ((vma->vm_flags & VM_WRITE) && map->frozen) {
- err = -EPERM;
- goto out;
+ if (vma->vm_flags & VM_WRITE) {
+ if (map->frozen) {
+ err = -EPERM;
+ goto out;
+ }
+ /* map is meant to be read-only, so do not allow mapping as
+ * writable, because it's possible to leak a writable page
+ * reference and allows user-space to still modify it after
+ * freezing, while verifier will assume contents do not change
+ */
+ if (map->map_flags & BPF_F_RDONLY_PROG) {
+ err = -EACCES;
+ goto out;
+ }
}
/* set default open/close callbacks */
@@ -1485,8 +1496,10 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
if (err)
goto free_value;
- if (copy_to_user(uvalue, value, value_size) != 0)
+ if (copy_to_user(uvalue, value, value_size) != 0) {
+ err = -EFAULT;
goto free_value;
+ }
err = 0;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fa1d8245b925..8d7ee40e2748 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4340,7 +4340,9 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
if (ret_type != RET_INTEGER ||
(func_id != BPF_FUNC_get_stack &&
- func_id != BPF_FUNC_probe_read_str))
+ func_id != BPF_FUNC_probe_read_str &&
+ func_id != BPF_FUNC_probe_read_kernel_str &&
+ func_id != BPF_FUNC_probe_read_user_str))
return;
ret_reg->smax_value = meta->msize_max_value;
@@ -7059,6 +7061,23 @@ static int check_return_code(struct bpf_verifier_env *env)
return 0;
range = tnum_const(0);
break;
+ case BPF_PROG_TYPE_TRACING:
+ switch (env->prog->expected_attach_type) {
+ case BPF_TRACE_FENTRY:
+ case BPF_TRACE_FEXIT:
+ range = tnum_const(0);
+ break;
+ case BPF_TRACE_RAW_TP:
+ case BPF_MODIFY_RETURN:
+ return 0;
+ default:
+ return -ENOTSUPP;
+ }
+ break;
+ case BPF_PROG_TYPE_EXT:
+ /* freplace program can return anything as its return value
+ * depends on the to-be-replaced kernel func or bpf program.
+ */
default:
return 0;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 8c700f881d92..48ed22774efa 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2486,11 +2486,11 @@ long do_fork(unsigned long clone_flags,
int __user *child_tidptr)
{
struct kernel_clone_args args = {
- .flags = (clone_flags & ~CSIGNAL),
+ .flags = (lower_32_bits(clone_flags) & ~CSIGNAL),
.pidfd = parent_tidptr,
.child_tid = child_tidptr,
.parent_tid = parent_tidptr,
- .exit_signal = (clone_flags & CSIGNAL),
+ .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL),
.stack = stack_start,
.stack_size = stack_size,
};
@@ -2508,8 +2508,9 @@ long do_fork(unsigned long clone_flags,
pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
struct kernel_clone_args args = {
- .flags = ((flags | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL),
- .exit_signal = (flags & CSIGNAL),
+ .flags = ((lower_32_bits(flags) | CLONE_VM |
+ CLONE_UNTRACED) & ~CSIGNAL),
+ .exit_signal = (lower_32_bits(flags) & CSIGNAL),
.stack = (unsigned long)fn,
.stack_size = (unsigned long)arg,
};
@@ -2570,11 +2571,11 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
#endif
{
struct kernel_clone_args args = {
- .flags = (clone_flags & ~CSIGNAL),
+ .flags = (lower_32_bits(clone_flags) & ~CSIGNAL),
.pidfd = parent_tidptr,
.child_tid = child_tidptr,
.parent_tid = parent_tidptr,
- .exit_signal = (clone_flags & CSIGNAL),
+ .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL),
.stack = newsp,
.tls = tls,
};
diff --git a/kernel/kcov.c b/kernel/kcov.c
index f50354202dbe..8accc9722a81 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -740,8 +740,8 @@ static const struct file_operations kcov_fops = {
* kcov_remote_handle() with KCOV_SUBSYSTEM_COMMON as the subsystem id and an
* arbitrary 4-byte non-zero number as the instance id). This common handle
* then gets saved into the task_struct of the process that issued the
- * KCOV_REMOTE_ENABLE ioctl. When this proccess issues system calls that spawn
- * kernel threads, the common handle must be retrived via kcov_common_handle()
+ * KCOV_REMOTE_ENABLE ioctl. When this process issues system calls that spawn
+ * kernel threads, the common handle must be retrieved via kcov_common_handle()
* and passed to the spawned threads via custom annotations. Those kernel
* threads must in turn be annotated with kcov_remote_start(common_handle) and
* kcov_remote_stop(). All of the threads that are spawned by the same process
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 86aba8706b16..30bd28d1d418 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -898,6 +898,13 @@ static int software_resume(void)
error = freeze_processes();
if (error)
goto Close_Finish;
+
+ error = freeze_kernel_threads();
+ if (error) {
+ thaw_processes();
+ goto Close_Finish;
+ }
+
error = load_image_and_restore();
thaw_processes();
Finish:
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index a562df57a86e..239970b991c0 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -948,8 +948,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
P(se.avg.util_est.enqueued);
#endif
#ifdef CONFIG_UCLAMP_TASK
- __PS("uclamp.min", p->uclamp[UCLAMP_MIN].value);
- __PS("uclamp.max", p->uclamp[UCLAMP_MAX].value);
+ __PS("uclamp.min", p->uclamp_req[UCLAMP_MIN].value);
+ __PS("uclamp.max", p->uclamp_req[UCLAMP_MAX].value);
__PS("effective uclamp.min", uclamp_eff_value(p, UCLAMP_MIN));
__PS("effective uclamp.max", uclamp_eff_value(p, UCLAMP_MAX));
#endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 02f323b85b6d..538ba5d94e99 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4774,7 +4774,6 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
struct rq *rq = rq_of(cfs_rq);
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
struct sched_entity *se;
- int enqueue = 1;
long task_delta, idle_task_delta;
se = cfs_rq->tg->se[cpu_of(rq)];
@@ -4798,26 +4797,44 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
idle_task_delta = cfs_rq->idle_h_nr_running;
for_each_sched_entity(se) {
if (se->on_rq)
- enqueue = 0;
+ break;
+ cfs_rq = cfs_rq_of(se);
+ enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
+
+ cfs_rq->h_nr_running += task_delta;
+ cfs_rq->idle_h_nr_running += idle_task_delta;
+
+ /* end evaluation on encountering a throttled cfs_rq */
+ if (cfs_rq_throttled(cfs_rq))
+ goto unthrottle_throttle;
+ }
+ for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
- if (enqueue) {
- enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
- } else {
- update_load_avg(cfs_rq, se, 0);
- se_update_runnable(se);
- }
+
+ update_load_avg(cfs_rq, se, UPDATE_TG);
+ se_update_runnable(se);
cfs_rq->h_nr_running += task_delta;
cfs_rq->idle_h_nr_running += idle_task_delta;
+
+ /* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq))
- break;
+ goto unthrottle_throttle;
+
+ /*
+ * One parent has been throttled and cfs_rq removed from the
+ * list. Add it back to not break the leaf list.
+ */
+ if (throttled_hierarchy(cfs_rq))
+ list_add_leaf_cfs_rq(cfs_rq);
}
- if (!se)
- add_nr_running(rq, task_delta);
+ /* At this point se is NULL and we are at root level*/
+ add_nr_running(rq, task_delta);
+unthrottle_throttle:
/*
* The cfs_rq_throttled() breaks in the above iteration can result in
* incomplete leaf list maintenance, resulting in triggering the
@@ -4826,7 +4843,8 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
- list_add_leaf_cfs_rq(cfs_rq);
+ if (list_add_leaf_cfs_rq(cfs_rq))
+ break;
}
assert_list_leaf_cfs_rq(rq);
@@ -5479,6 +5497,13 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
/* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq))
goto enqueue_throttle;
+
+ /*
+ * One parent has been throttled and cfs_rq removed from the
+ * list. Add it back to not break the leaf list.
+ */
+ if (throttled_hierarchy(cfs_rq))
+ list_add_leaf_cfs_rq(cfs_rq);
}
enqueue_throttle:
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 402eef84c859..743647005f64 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -466,7 +466,6 @@ config PROFILE_ANNOTATED_BRANCHES
config PROFILE_ALL_BRANCHES
bool "Profile all if conditionals" if !FORTIFY_SOURCE
select TRACE_BRANCH_PROFILING
- imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED # avoid false positives
help
This tracer profiles all branch conditions. Every if ()
taken in the kernel is recorded whether it hit or miss.
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ca1796747a77..a010edc37ee0 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -323,17 +323,15 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
/*
* Only limited trace_printk() conversion specifiers allowed:
- * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s
+ * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pks %pus %s
*/
BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
u64, arg2, u64, arg3)
{
+ int i, mod[3] = {}, fmt_cnt = 0;
+ char buf[64], fmt_ptype;
+ void *unsafe_ptr = NULL;
bool str_seen = false;
- int mod[3] = {};
- int fmt_cnt = 0;
- u64 unsafe_addr;
- char buf[64];
- int i;
/*
* bpf_check()->check_func_arg()->check_stack_boundary()
@@ -359,40 +357,71 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
if (fmt[i] == 'l') {
mod[fmt_cnt]++;
i++;
- } else if (fmt[i] == 'p' || fmt[i] == 's') {
+ } else if (fmt[i] == 'p') {
mod[fmt_cnt]++;
+ if ((fmt[i + 1] == 'k' ||
+ fmt[i + 1] == 'u') &&
+ fmt[i + 2] == 's') {
+ fmt_ptype = fmt[i + 1];
+ i += 2;
+ goto fmt_str;
+ }
+
/* disallow any further format extensions */
if (fmt[i + 1] != 0 &&
!isspace(fmt[i + 1]) &&
!ispunct(fmt[i + 1]))
return -EINVAL;
- fmt_cnt++;
- if (fmt[i] == 's') {
- if (str_seen)
- /* allow only one '%s' per fmt string */
- return -EINVAL;
- str_seen = true;
-
- switch (fmt_cnt) {
- case 1:
- unsafe_addr = arg1;
- arg1 = (long) buf;
- break;
- case 2:
- unsafe_addr = arg2;
- arg2 = (long) buf;
- break;
- case 3:
- unsafe_addr = arg3;
- arg3 = (long) buf;
- break;
- }
- buf[0] = 0;
- strncpy_from_unsafe(buf,
- (void *) (long) unsafe_addr,
+
+ goto fmt_next;
+ } else if (fmt[i] == 's') {
+ mod[fmt_cnt]++;
+ fmt_ptype = fmt[i];
+fmt_str:
+ if (str_seen)
+ /* allow only one '%s' per fmt string */
+ return -EINVAL;
+ str_seen = true;
+
+ if (fmt[i + 1] != 0 &&
+ !isspace(fmt[i + 1]) &&
+ !ispunct(fmt[i + 1]))
+ return -EINVAL;
+
+ switch (fmt_cnt) {
+ case 0:
+ unsafe_ptr = (void *)(long)arg1;
+ arg1 = (long)buf;
+ break;
+ case 1:
+ unsafe_ptr = (void *)(long)arg2;
+ arg2 = (long)buf;
+ break;
+ case 2:
+ unsafe_ptr = (void *)(long)arg3;
+ arg3 = (long)buf;
+ break;
+ }
+
+ buf[0] = 0;
+ switch (fmt_ptype) {
+ case 's':
+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+ strncpy_from_unsafe(buf, unsafe_ptr,
sizeof(buf));
+ break;
+#endif
+ case 'k':
+ strncpy_from_unsafe_strict(buf, unsafe_ptr,
+ sizeof(buf));
+ break;
+ case 'u':
+ strncpy_from_unsafe_user(buf,
+ (__force void __user *)unsafe_ptr,
+ sizeof(buf));
+ break;
}
- continue;
+ goto fmt_next;
}
if (fmt[i] == 'l') {
@@ -403,6 +432,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
if (fmt[i] != 'i' && fmt[i] != 'd' &&
fmt[i] != 'u' && fmt[i] != 'x')
return -EINVAL;
+fmt_next:
fmt_cnt++;
}
@@ -825,14 +855,16 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_probe_read_user_proto;
case BPF_FUNC_probe_read_kernel:
return &bpf_probe_read_kernel_proto;
- case BPF_FUNC_probe_read:
- return &bpf_probe_read_compat_proto;
case BPF_FUNC_probe_read_user_str:
return &bpf_probe_read_user_str_proto;
case BPF_FUNC_probe_read_kernel_str:
return &bpf_probe_read_kernel_str_proto;
+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+ case BPF_FUNC_probe_read:
+ return &bpf_probe_read_compat_proto;
case BPF_FUNC_probe_read_str:
return &bpf_probe_read_compat_str_proto;
+#endif
#ifdef CONFIG_CGROUPS
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
diff --git a/kernel/trace/ftrace_internal.h b/kernel/trace/ftrace_internal.h
index 0456e0a3dab1..382775edf690 100644
--- a/kernel/trace/ftrace_internal.h
+++ b/kernel/trace/ftrace_internal.h
@@ -4,28 +4,6 @@
#ifdef CONFIG_FUNCTION_TRACER
-/*
- * Traverse the ftrace_global_list, invoking all entries. The reason that we
- * can use rcu_dereference_raw_check() is that elements removed from this list
- * are simply leaked, so there is no need to interact with a grace-period
- * mechanism. The rcu_dereference_raw_check() calls are needed to handle
- * concurrent insertions into the ftrace_global_list.
- *
- * Silly Alpha and silly pointer-speculation compiler optimizations!
- */
-#define do_for_each_ftrace_op(op, list) \
- op = rcu_dereference_raw_check(list); \
- do
-
-/*
- * Optimized for just a single item in the list (as that is the normal case).
- */
-#define while_for_each_ftrace_op(op) \
- while (likely(op = rcu_dereference_raw_check((op)->next)) && \
- unlikely((op) != &ftrace_list_end))
-
-extern struct ftrace_ops __rcu *ftrace_ops_list;
-extern struct ftrace_ops ftrace_list_end;
extern struct mutex ftrace_lock;
extern struct ftrace_ops global_ops;
diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
index 31c0fad4cb9e..312d1a0ca3b6 100644
--- a/kernel/trace/preemptirq_delay_test.c
+++ b/kernel/trace/preemptirq_delay_test.c
@@ -16,6 +16,7 @@
#include <linux/printk.h>
#include <linux/string.h>
#include <linux/sysfs.h>
+#include <linux/completion.h>
static ulong delay = 100;
static char test_mode[12] = "irq";
@@ -28,6 +29,8 @@ MODULE_PARM_DESC(delay, "Period in microseconds (100 us default)");
MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt, irq, or alternate (default irq)");
MODULE_PARM_DESC(burst_size, "The size of a burst (default 1)");
+static struct completion done;
+
#define MIN(x, y) ((x) < (y) ? (x) : (y))
static void busy_wait(ulong time)
@@ -113,22 +116,47 @@ static int preemptirq_delay_run(void *data)
for (i = 0; i < s; i++)
(testfuncs[i])(i);
+
+ complete(&done);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ while (!kthread_should_stop()) {
+ schedule();
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+
+ __set_current_state(TASK_RUNNING);
+
return 0;
}
-static struct task_struct *preemptirq_start_test(void)
+static int preemptirq_run_test(void)
{
+ struct task_struct *task;
char task_name[50];
+ init_completion(&done);
+
snprintf(task_name, sizeof(task_name), "%s_test", test_mode);
- return kthread_run(preemptirq_delay_run, NULL, task_name);
+ task = kthread_run(preemptirq_delay_run, NULL, task_name);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ if (task) {
+ wait_for_completion(&done);
+ kthread_stop(task);
+ }
+ return 0;
}
static ssize_t trigger_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
- preemptirq_start_test();
+ ssize_t ret;
+
+ ret = preemptirq_run_test();
+ if (ret)
+ return ret;
return count;
}
@@ -148,11 +176,9 @@ static struct kobject *preemptirq_delay_kobj;
static int __init preemptirq_delay_init(void)
{
- struct task_struct *test_task;
int retval;
- test_task = preemptirq_start_test();
- retval = PTR_ERR_OR_ZERO(test_task);
+ retval = preemptirq_run_test();
if (retval != 0)
return retval;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 6f0b42ceeb00..b8e1ca48be50 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -193,7 +193,7 @@ rb_event_length(struct ring_buffer_event *event)
case RINGBUF_TYPE_DATA:
return rb_event_data_length(event);
default:
- BUG();
+ WARN_ON_ONCE(1);
}
/* not hit */
return 0;
@@ -249,7 +249,7 @@ rb_event_data(struct ring_buffer_event *event)
{
if (extended_time(event))
event = skip_time_extend(event);
- BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
+ WARN_ON_ONCE(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
/* If length is in len field, then array[0] has the data */
if (event->type_len)
return (void *)&event->array[0];
@@ -3727,7 +3727,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
return;
default:
- BUG();
+ RB_WARN_ON(cpu_buffer, 1);
}
return;
}
@@ -3757,7 +3757,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
return;
default:
- BUG();
+ RB_WARN_ON(iter->cpu_buffer, 1);
}
return;
}
@@ -4020,7 +4020,7 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
return event;
default:
- BUG();
+ RB_WARN_ON(cpu_buffer, 1);
}
return NULL;
@@ -4034,7 +4034,6 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
int nr_loops = 0;
- bool failed = false;
if (ts)
*ts = 0;
@@ -4056,19 +4055,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
return NULL;
/*
- * We repeat when a time extend is encountered or we hit
- * the end of the page. Since the time extend is always attached
- * to a data event, we should never loop more than three times.
- * Once for going to next page, once on time extend, and
- * finally once to get the event.
- * We should never hit the following condition more than thrice,
- * unless the buffer is very small, and there's a writer
- * that is causing the reader to fail getting an event.
+ * As the writer can mess with what the iterator is trying
+ * to read, just give up if we fail to get an event after
+ * three tries. The iterator is not as reliable when reading
+ * the ring buffer with an active write as the consumer is.
+ * Do not warn if the three failures is reached.
*/
- if (++nr_loops > 3) {
- RB_WARN_ON(cpu_buffer, !failed);
+ if (++nr_loops > 3)
return NULL;
- }
if (rb_per_cpu_empty(cpu_buffer))
return NULL;
@@ -4079,10 +4073,8 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
}
event = rb_iter_head_event(iter);
- if (!event) {
- failed = true;
+ if (!event)
goto again;
- }
switch (event->type_len) {
case RINGBUF_TYPE_PADDING:
@@ -4117,7 +4109,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
return event;
default:
- BUG();
+ RB_WARN_ON(cpu_buffer, 1);
}
return NULL;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8d2b98812625..29615f15a820 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -947,7 +947,8 @@ int __trace_bputs(unsigned long ip, const char *str)
EXPORT_SYMBOL_GPL(__trace_bputs);
#ifdef CONFIG_TRACER_SNAPSHOT
-void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
+static void tracing_snapshot_instance_cond(struct trace_array *tr,
+ void *cond_data)
{
struct tracer *tracer = tr->current_trace;
unsigned long flags;
@@ -8525,6 +8526,19 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
*/
allocate_snapshot = false;
#endif
+
+ /*
+ * Because of some magic with the way alloc_percpu() works on
+ * x86_64, we need to synchronize the pgd of all the tables,
+ * otherwise the trace events that happen in x86_64 page fault
+ * handlers can't cope with accessing the chance that a
+ * alloc_percpu()'d memory might be touched in the page fault trace
+ * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
+ * calls in tracing, because something might get triggered within a
+ * page fault trace event!
+ */
+ vmalloc_sync_mappings();
+
return 0;
}
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 06d7feb5255f..9de29bb45a27 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -95,24 +95,20 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event)
struct xbc_node *anode;
char buf[MAX_BUF_LEN];
const char *val;
- int ret;
+ int ret = 0;
- kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN);
+ xbc_node_for_each_array_value(node, "probes", anode, val) {
+ kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN);
- ret = kprobe_event_gen_cmd_start(&cmd, event, NULL);
- if (ret)
- return ret;
+ ret = kprobe_event_gen_cmd_start(&cmd, event, val);
+ if (ret)
+ break;
- xbc_node_for_each_array_value(node, "probes", anode, val) {
- ret = kprobe_event_add_field(&cmd, val);
+ ret = kprobe_event_gen_cmd_end(&cmd);
if (ret)
- return ret;
+ pr_err("Failed to add probe: %s\n", buf);
}
- ret = kprobe_event_gen_cmd_end(&cmd);
- if (ret)
- pr_err("Failed to add probe: %s\n", buf);
-
return ret;
}
#else
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index d0568af4a0ef..35989383ae11 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -453,7 +453,7 @@ static bool __within_notrace_func(unsigned long addr)
static bool within_notrace_func(struct trace_kprobe *tk)
{
- unsigned long addr = addr = trace_kprobe_address(tk);
+ unsigned long addr = trace_kprobe_address(tk);
char symname[KSYM_NAME_LEN], *p;
if (!__within_notrace_func(addr))
@@ -940,6 +940,9 @@ EXPORT_SYMBOL_GPL(kprobe_event_cmd_init);
* complete command or only the first part of it; in the latter case,
* kprobe_event_add_fields() can be used to add more fields following this.
*
+ * Unlikely the synth_event_gen_cmd_start(), @loc must be specified. This
+ * returns -EINVAL if @loc == NULL.
+ *
* Return: 0 if successful, error otherwise.
*/
int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe,
@@ -953,6 +956,9 @@ int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe,
if (cmd->type != DYNEVENT_TYPE_KPROBE)
return -EINVAL;
+ if (!loc)
+ return -EINVAL;
+
if (kretprobe)
snprintf(buf, MAX_EVENT_NAME_LEN, "r:kprobes/%s", name);
else
diff --git a/kernel/umh.c b/kernel/umh.c
index 7f255b5a8845..3474d6aa55d8 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -475,6 +475,12 @@ static void umh_clean_and_save_pid(struct subprocess_info *info)
{
struct umh_info *umh_info = info->data;
+ /* cleanup if umh_pipe_setup() was successful but exec failed */
+ if (info->pid && info->retval) {
+ fput(umh_info->pipe_to_umh);
+ fput(umh_info->pipe_from_umh);
+ }
+
argv_free(info->argv);
umh_info->pid = info->pid;
}
@@ -544,6 +550,11 @@ EXPORT_SYMBOL_GPL(fork_usermode_blob);
* Runs a user-space application. The application is started
* asynchronously if wait is not set, and runs as a child of system workqueues.
* (ie. it runs with full root capabilities and optimized affinity).
+ *
+ * Note: successful return value does not guarantee the helper was called at
+ * all. You can't rely on sub_info->{init,cleanup} being called even for
+ * UMH_WAIT_* wait modes as STATIC_USERMODEHELPER_PATH="" turns all helpers
+ * into a successful no-op.
*/
int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
{