summaryrefslogtreecommitdiff
path: root/kernel/events/uprobes.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events/uprobes.c')
-rw-r--r--kernel/events/uprobes.c233
1 files changed, 174 insertions, 59 deletions
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index fa04b14a7d72..bf2a87a0a378 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -28,6 +28,7 @@
#include <linux/rcupdate_trace.h>
#include <linux/workqueue.h>
#include <linux/srcu.h>
+#include <linux/oom.h> /* check_stable_address_space */
#include <linux/uprobes.h>
@@ -416,7 +417,7 @@ static void update_ref_ctr_warn(struct uprobe *uprobe,
struct mm_struct *mm, short d)
{
pr_warn("ref_ctr %s failed for inode: 0x%lx offset: "
- "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n",
+ "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%p\n",
d > 0 ? "increment" : "decrement", uprobe->inode->i_ino,
(unsigned long long) uprobe->offset,
(unsigned long long) uprobe->ref_ctr_offset, mm);
@@ -1260,6 +1261,9 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
* returns NULL in find_active_uprobe_rcu().
*/
mmap_write_lock(mm);
+ if (check_stable_address_space(mm))
+ goto unlock;
+
vma = find_vma(mm, info->vaddr);
if (!vma || !valid_vma(vma, is_register) ||
file_inode(vma->vm_file) != uprobe->inode)
@@ -1888,9 +1892,33 @@ unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
return instruction_pointer(regs);
}
-static struct return_instance *free_ret_instance(struct return_instance *ri, bool cleanup_hprobe)
+static void ri_pool_push(struct uprobe_task *utask, struct return_instance *ri)
+{
+ ri->cons_cnt = 0;
+ ri->next = utask->ri_pool;
+ utask->ri_pool = ri;
+}
+
+static struct return_instance *ri_pool_pop(struct uprobe_task *utask)
{
- struct return_instance *next = ri->next;
+ struct return_instance *ri = utask->ri_pool;
+
+ if (likely(ri))
+ utask->ri_pool = ri->next;
+
+ return ri;
+}
+
+static void ri_free(struct return_instance *ri)
+{
+ kfree(ri->extra_consumers);
+ kfree_rcu(ri, rcu);
+}
+
+static void free_ret_instance(struct uprobe_task *utask,
+ struct return_instance *ri, bool cleanup_hprobe)
+{
+ unsigned seq;
if (cleanup_hprobe) {
enum hprobe_state hstate;
@@ -1899,8 +1927,22 @@ static struct return_instance *free_ret_instance(struct return_instance *ri, boo
hprobe_finalize(&ri->hprobe, hstate);
}
- kfree_rcu(ri, rcu);
- return next;
+ /*
+ * At this point return_instance is unlinked from utask's
+ * return_instances list and this has become visible to ri_timer().
+ * If seqcount now indicates that ri_timer's return instance
+ * processing loop isn't active, we can return ri into the pool of
+ * to-be-reused return instances for future uretprobes. If ri_timer()
+ * happens to be running right now, though, we fallback to safety and
+ * just perform RCU-delated freeing of ri.
+ */
+ if (raw_seqcount_try_begin(&utask->ri_seqcount, seq)) {
+ /* immediate reuse of ri without RCU GP is OK */
+ ri_pool_push(utask, ri);
+ } else {
+ /* we might be racing with ri_timer(), so play it safe */
+ ri_free(ri);
+ }
}
/*
@@ -1910,21 +1952,32 @@ static struct return_instance *free_ret_instance(struct return_instance *ri, boo
void uprobe_free_utask(struct task_struct *t)
{
struct uprobe_task *utask = t->utask;
- struct return_instance *ri;
+ struct return_instance *ri, *ri_next;
if (!utask)
return;
+ t->utask = NULL;
WARN_ON_ONCE(utask->active_uprobe || utask->xol_vaddr);
timer_delete_sync(&utask->ri_timer);
ri = utask->return_instances;
- while (ri)
- ri = free_ret_instance(ri, true /* cleanup_hprobe */);
+ while (ri) {
+ ri_next = ri->next;
+ free_ret_instance(utask, ri, true /* cleanup_hprobe */);
+ ri = ri_next;
+ }
+
+ /* free_ret_instance() above might add to ri_pool, so this loop should come last */
+ ri = utask->ri_pool;
+ while (ri) {
+ ri_next = ri->next;
+ ri_free(ri);
+ ri = ri_next;
+ }
kfree(utask);
- t->utask = NULL;
}
#define RI_TIMER_PERIOD (HZ / 10) /* 100 ms */
@@ -1942,8 +1995,12 @@ static void ri_timer(struct timer_list *timer)
/* RCU protects return_instance from freeing. */
guard(rcu)();
+ write_seqcount_begin(&utask->ri_seqcount);
+
for_each_ret_instance_rcu(ri, utask->return_instances)
hprobe_expire(&ri->hprobe, false);
+
+ write_seqcount_end(&utask->ri_seqcount);
}
static struct uprobe_task *alloc_utask(void)
@@ -1955,6 +2012,7 @@ static struct uprobe_task *alloc_utask(void)
return NULL;
timer_setup(&utask->ri_timer, ri_timer, 0);
+ seqcount_init(&utask->ri_seqcount);
return utask;
}
@@ -1974,32 +2032,40 @@ static struct uprobe_task *get_utask(void)
return current->utask;
}
-static size_t ri_size(int consumers_cnt)
+static struct return_instance *alloc_return_instance(struct uprobe_task *utask)
{
struct return_instance *ri;
- return sizeof(*ri) + sizeof(ri->consumers[0]) * consumers_cnt;
-}
-
-#define DEF_CNT 4
-
-static struct return_instance *alloc_return_instance(void)
-{
- struct return_instance *ri;
+ ri = ri_pool_pop(utask);
+ if (ri)
+ return ri;
- ri = kzalloc(ri_size(DEF_CNT), GFP_KERNEL);
+ ri = kzalloc(sizeof(*ri), GFP_KERNEL);
if (!ri)
return ZERO_SIZE_PTR;
- ri->consumers_cnt = DEF_CNT;
return ri;
}
static struct return_instance *dup_return_instance(struct return_instance *old)
{
- size_t size = ri_size(old->consumers_cnt);
+ struct return_instance *ri;
+
+ ri = kmemdup(old, sizeof(*ri), GFP_KERNEL);
+ if (!ri)
+ return NULL;
+
+ if (unlikely(old->cons_cnt > 1)) {
+ ri->extra_consumers = kmemdup(old->extra_consumers,
+ sizeof(ri->extra_consumers[0]) * (old->cons_cnt - 1),
+ GFP_KERNEL);
+ if (!ri->extra_consumers) {
+ kfree(ri);
+ return NULL;
+ }
+ }
- return kmemdup(old, size, GFP_KERNEL);
+ return ri;
}
static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
@@ -2108,14 +2174,17 @@ unsigned long uprobe_get_trampoline_vaddr(void)
static void cleanup_return_instances(struct uprobe_task *utask, bool chained,
struct pt_regs *regs)
{
- struct return_instance *ri = utask->return_instances;
+ struct return_instance *ri = utask->return_instances, *ri_next;
enum rp_check ctx = chained ? RP_CHECK_CHAIN_CALL : RP_CHECK_CALL;
while (ri && !arch_uretprobe_is_alive(ri, ctx, regs)) {
- ri = free_ret_instance(ri, true /* cleanup_hprobe */);
+ ri_next = ri->next;
+ rcu_assign_pointer(utask->return_instances, ri_next);
utask->depth--;
+
+ free_ret_instance(utask, ri, true /* cleanup_hprobe */);
+ ri = ri_next;
}
- rcu_assign_pointer(utask->return_instances, ri);
}
static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs,
@@ -2180,7 +2249,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs,
return;
free:
- kfree(ri);
+ ri_free(ri);
}
/* Prepare to single-step probed instruction out of line. */
@@ -2294,6 +2363,47 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
return is_trap_insn(&opcode);
}
+static struct uprobe *find_active_uprobe_speculative(unsigned long bp_vaddr)
+{
+ struct mm_struct *mm = current->mm;
+ struct uprobe *uprobe = NULL;
+ struct vm_area_struct *vma;
+ struct file *vm_file;
+ loff_t offset;
+ unsigned int seq;
+
+ guard(rcu)();
+
+ if (!mmap_lock_speculate_try_begin(mm, &seq))
+ return NULL;
+
+ vma = vma_lookup(mm, bp_vaddr);
+ if (!vma)
+ return NULL;
+
+ /*
+ * vm_file memory can be reused for another instance of struct file,
+ * but can't be freed from under us, so it's safe to read fields from
+ * it, even if the values are some garbage values; ultimately
+ * find_uprobe_rcu() + mmap_lock_speculation_end() check will ensure
+ * that whatever we speculatively found is correct
+ */
+ vm_file = READ_ONCE(vma->vm_file);
+ if (!vm_file)
+ return NULL;
+
+ offset = (loff_t)(vma->vm_pgoff << PAGE_SHIFT) + (bp_vaddr - vma->vm_start);
+ uprobe = find_uprobe_rcu(vm_file->f_inode, offset);
+ if (!uprobe)
+ return NULL;
+
+ /* now double check that nothing about MM changed */
+ if (mmap_lock_speculate_retry(mm, seq))
+ return NULL;
+
+ return uprobe;
+}
+
/* assumes being inside RCU protected region */
static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swbp)
{
@@ -2301,10 +2411,14 @@ static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swb
struct uprobe *uprobe = NULL;
struct vm_area_struct *vma;
+ uprobe = find_active_uprobe_speculative(bp_vaddr);
+ if (uprobe)
+ return uprobe;
+
mmap_read_lock(mm);
vma = vma_lookup(mm, bp_vaddr);
if (vma) {
- if (valid_vma(vma, false)) {
+ if (vma->vm_file) {
struct inode *inode = file_inode(vma->vm_file);
loff_t offset = vaddr_to_offset(vma, bp_vaddr);
@@ -2324,25 +2438,27 @@ static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swb
return uprobe;
}
-static struct return_instance*
-push_consumer(struct return_instance *ri, int idx, __u64 id, __u64 cookie)
+static struct return_instance *push_consumer(struct return_instance *ri, __u64 id, __u64 cookie)
{
+ struct return_consumer *ric;
+
if (unlikely(ri == ZERO_SIZE_PTR))
return ri;
- if (unlikely(idx >= ri->consumers_cnt)) {
- struct return_instance *old_ri = ri;
-
- ri->consumers_cnt += DEF_CNT;
- ri = krealloc(old_ri, ri_size(old_ri->consumers_cnt), GFP_KERNEL);
- if (!ri) {
- kfree(old_ri);
+ if (unlikely(ri->cons_cnt > 0)) {
+ ric = krealloc(ri->extra_consumers, sizeof(*ric) * ri->cons_cnt, GFP_KERNEL);
+ if (!ric) {
+ ri_free(ri);
return ZERO_SIZE_PTR;
}
+ ri->extra_consumers = ric;
}
- ri->consumers[idx].id = id;
- ri->consumers[idx].cookie = cookie;
+ ric = likely(ri->cons_cnt == 0) ? &ri->consumer : &ri->extra_consumers[ri->cons_cnt - 1];
+ ric->id = id;
+ ric->cookie = cookie;
+
+ ri->cons_cnt++;
return ri;
}
@@ -2350,14 +2466,17 @@ static struct return_consumer *
return_consumer_find(struct return_instance *ri, int *iter, int id)
{
struct return_consumer *ric;
- int idx = *iter;
+ int idx;
- for (ric = &ri->consumers[idx]; idx < ri->consumers_cnt; idx++, ric++) {
+ for (idx = *iter; idx < ri->cons_cnt; idx++)
+ {
+ ric = likely(idx == 0) ? &ri->consumer : &ri->extra_consumers[idx - 1];
if (ric->id == id) {
*iter = idx + 1;
return ric;
}
}
+
return NULL;
}
@@ -2371,9 +2490,9 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
struct uprobe_consumer *uc;
bool has_consumers = false, remove = true;
struct return_instance *ri = NULL;
- int push_idx = 0;
+ struct uprobe_task *utask = current->utask;
- current->utask->auprobe = &uprobe->arch;
+ utask->auprobe = &uprobe->arch;
list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
bool session = uc->handler && uc->ret_handler;
@@ -2393,21 +2512,15 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
continue;
if (!ri)
- ri = alloc_return_instance();
+ ri = alloc_return_instance(utask);
if (session)
- ri = push_consumer(ri, push_idx++, uc->id, cookie);
+ ri = push_consumer(ri, uc->id, cookie);
}
- current->utask->auprobe = NULL;
+ utask->auprobe = NULL;
- if (!ZERO_OR_NULL_PTR(ri)) {
- /*
- * The push_idx value has the final number of return consumers,
- * and ri->consumers_cnt has number of allocated consumers.
- */
- ri->consumers_cnt = push_idx;
+ if (!ZERO_OR_NULL_PTR(ri))
prepare_uretprobe(uprobe, regs, ri);
- }
if (remove && has_consumers) {
down_read(&uprobe->register_rwsem);
@@ -2461,7 +2574,7 @@ static struct return_instance *find_next_ret_chain(struct return_instance *ri)
void uprobe_handle_trampoline(struct pt_regs *regs)
{
struct uprobe_task *utask;
- struct return_instance *ri, *next;
+ struct return_instance *ri, *ri_next, *next_chain;
struct uprobe *uprobe;
enum hprobe_state hstate;
bool valid;
@@ -2481,8 +2594,8 @@ void uprobe_handle_trampoline(struct pt_regs *regs)
* or NULL; the latter case means that nobody but ri->func
* could hit this trampoline on return. TODO: sigaltstack().
*/
- next = find_next_ret_chain(ri);
- valid = !next || arch_uretprobe_is_alive(next, RP_CHECK_RET, regs);
+ next_chain = find_next_ret_chain(ri);
+ valid = !next_chain || arch_uretprobe_is_alive(next_chain, RP_CHECK_RET, regs);
instruction_pointer_set(regs, ri->orig_ret_vaddr);
do {
@@ -2494,7 +2607,9 @@ void uprobe_handle_trampoline(struct pt_regs *regs)
* trampoline addresses on the stack are replaced with correct
* original return addresses
*/
- rcu_assign_pointer(utask->return_instances, ri->next);
+ ri_next = ri->next;
+ rcu_assign_pointer(utask->return_instances, ri_next);
+ utask->depth--;
uprobe = hprobe_consume(&ri->hprobe, &hstate);
if (valid)
@@ -2502,9 +2617,9 @@ void uprobe_handle_trampoline(struct pt_regs *regs)
hprobe_finalize(&ri->hprobe, hstate);
/* We already took care of hprobe, no need to waste more time on that. */
- ri = free_ret_instance(ri, false /* !cleanup_hprobe */);
- utask->depth--;
- } while (ri != next);
+ free_ret_instance(utask, ri, false /* !cleanup_hprobe */);
+ ri = ri_next;
+ } while (ri != next_chain);
} while (!valid);
return;