summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-06-04 02:03:05 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-06-04 02:03:05 +0300
commit1ec6574a3c0a22c130c08e8c36c825cb87d68f8e (patch)
tree1f064c4a4965b9b0dd213456649b23e066d54bea
parent1888e9b4bb78c88514b24ecafa9e4e4faf761747 (diff)
parentb3f9916d81e8ffb21cbe7abccf63f86a5a1d598a (diff)
downloadlinux-1ec6574a3c0a22c130c08e8c36c825cb87d68f8e.tar.xz
Merge tag 'kthread-cleanups-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull kthread updates from Eric Biederman: "This updates init and user mode helper tasks to be ordinary user mode tasks. Commit 40966e316f86 ("kthread: Ensure struct kthread is present for all kthreads") caused init and the user mode helper threads that call kernel_execve to have struct kthread allocated for them. This struct kthread going away during execve in turned made a use after free of struct kthread possible. Here, commit 343f4c49f243 ("kthread: Don't allocate kthread_struct for init and umh") is enough to fix the use after free and is simple enough to be backportable. The rest of the changes pass struct kernel_clone_args to clean things up and cause the code to make sense. In making init and the user mode helpers tasks purely user mode tasks I ran into two complications. The function task_tick_numa was detecting tasks without an mm by testing for the presence of PF_KTHREAD. The initramfs code in populate_initrd_image was using flush_delayed_fput to ensuere the closing of all it's file descriptors was complete, and flush_delayed_fput does not work in a userspace thread. I have looked and looked and more complications and in my code review I have not found any, and neither has anyone else with the code sitting in linux-next" * tag 'kthread-cleanups-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: sched: Update task_tick_numa to ignore tasks without an mm fork: Stop allowing kthreads to call execve fork: Explicitly set PF_KTHREAD init: Deal with the init process being a user mode process fork: Generalize PF_IO_WORKER handling fork: Explicity test for idle tasks in copy_thread fork: Pass struct kernel_clone_args into copy_thread kthread: Don't allocate kthread_struct for init and umh
-rw-r--r--arch/alpha/kernel/process.c13
-rw-r--r--arch/arc/kernel/process.c13
-rw-r--r--arch/arm/kernel/process.c12
-rw-r--r--arch/arm64/kernel/process.c12
-rw-r--r--arch/csky/kernel/process.c15
-rw-r--r--arch/hexagon/kernel/process.c12
-rw-r--r--arch/ia64/kernel/process.c15
-rw-r--r--arch/m68k/kernel/process.c12
-rw-r--r--arch/microblaze/kernel/process.c12
-rw-r--r--arch/mips/kernel/process.c13
-rw-r--r--arch/nios2/kernel/process.c12
-rw-r--r--arch/openrisc/kernel/process.c12
-rw-r--r--arch/parisc/kernel/process.c18
-rw-r--r--arch/powerpc/kernel/process.c15
-rw-r--r--arch/riscv/kernel/process.c12
-rw-r--r--arch/s390/kernel/process.c12
-rw-r--r--arch/sh/kernel/process_32.c12
-rw-r--r--arch/sparc/kernel/process_32.c12
-rw-r--r--arch/sparc/kernel/process_64.c12
-rw-r--r--arch/um/kernel/process.c15
-rw-r--r--arch/x86/include/asm/fpu/sched.h2
-rw-r--r--arch/x86/include/asm/switch_to.h8
-rw-r--r--arch/x86/kernel/fpu/core.c4
-rw-r--r--arch/x86/kernel/process.c18
-rw-r--r--arch/xtensa/kernel/process.c17
-rw-r--r--fs/exec.c8
-rw-r--r--include/linux/sched/task.h8
-rw-r--r--init/initramfs.c2
-rw-r--r--init/main.c2
-rw-r--r--kernel/fork.c46
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/umh.c6
32 files changed, 229 insertions, 155 deletions
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 5f8527081da9..6cbba7370b4e 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -233,10 +233,11 @@ release_thread(struct task_struct *dead_task)
/*
* Copy architecture-specific thread state
*/
-int copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long kthread_arg, struct task_struct *p,
- unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+ unsigned long tls = args->tls;
extern void ret_from_fork(void);
extern void ret_from_kernel_thread(void);
@@ -249,13 +250,13 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
childti->pcb.ksp = (unsigned long) childstack;
childti->pcb.flags = 1; /* set FEN, clear everything else */
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
/* kernel thread */
memset(childstack, 0,
sizeof(struct switch_stack) + sizeof(struct pt_regs));
childstack->r26 = (unsigned long) ret_from_kernel_thread;
- childstack->r9 = usp; /* function */
- childstack->r10 = kthread_arg;
+ childstack->r9 = (unsigned long) args->fn;
+ childstack->r10 = (unsigned long) args->fn_arg;
childregs->hae = alpha_mv.hae_cache;
childti->pcb.usp = 0;
return 0;
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index 5f7f5aab361f..3369f0700702 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -162,10 +162,11 @@ asmlinkage void ret_from_fork(void);
* | user_r25 |
* ------------------ <===== END of PAGE
*/
-int copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long kthread_arg, struct task_struct *p,
- unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+ unsigned long tls = args->tls;
struct pt_regs *c_regs; /* child's pt_regs */
unsigned long *childksp; /* to unwind out of __switch_to() */
struct callee_regs *c_callee; /* child's callee regs */
@@ -191,11 +192,11 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
childksp[0] = 0; /* fp */
childksp[1] = (unsigned long)ret_from_fork; /* blink */
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
memset(c_regs, 0, sizeof(struct pt_regs));
- c_callee->r13 = kthread_arg;
- c_callee->r14 = usp; /* function */
+ c_callee->r13 = (unsigned long)args->fn_arg;
+ c_callee->r14 = (unsigned long)args->fn;
return 0;
}
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 0617af11377f..3d9cace63884 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -238,9 +238,11 @@ void release_thread(struct task_struct *dead_task)
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
-int copy_thread(unsigned long clone_flags, unsigned long stack_start,
- unsigned long stk_sz, struct task_struct *p, unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long stack_start = args->stack;
+ unsigned long tls = args->tls;
struct thread_info *thread = task_thread_info(p);
struct pt_regs *childregs = task_pt_regs(p);
@@ -256,15 +258,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
thread->cpu_domain = get_domain();
#endif
- if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) {
+ if (likely(!args->fn)) {
*childregs = *current_pt_regs();
childregs->ARM_r0 = 0;
if (stack_start)
childregs->ARM_sp = stack_start;
} else {
memset(childregs, 0, sizeof(struct pt_regs));
- thread->cpu_context.r4 = stk_sz;
- thread->cpu_context.r5 = stack_start;
+ thread->cpu_context.r4 = (unsigned long)args->fn_arg;
+ thread->cpu_context.r5 = (unsigned long)args->fn;
childregs->ARM_cpsr = SVC_MODE;
}
thread->cpu_context.pc = (unsigned long)ret_from_fork;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 2f42123e059f..92bcc1768f0b 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -343,9 +343,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
asmlinkage void ret_from_fork(void) asm("ret_from_fork");
-int copy_thread(unsigned long clone_flags, unsigned long stack_start,
- unsigned long stk_sz, struct task_struct *p, unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long stack_start = args->stack;
+ unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
@@ -361,7 +363,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
ptrauth_thread_init_kernel(p);
- if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) {
+ if (likely(!args->fn)) {
*childregs = *current_pt_regs();
childregs->regs[0] = 0;
@@ -399,8 +401,8 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
memset(childregs, 0, sizeof(struct pt_regs));
childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT;
- p->thread.cpu_context.x19 = stack_start;
- p->thread.cpu_context.x20 = stk_sz;
+ p->thread.cpu_context.x19 = (unsigned long)args->fn;
+ p->thread.cpu_context.x20 = (unsigned long)args->fn_arg;
}
p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
p->thread.cpu_context.sp = (unsigned long)childregs;
diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c
index 5de04707aa07..eedddb155669 100644
--- a/arch/csky/kernel/process.c
+++ b/arch/csky/kernel/process.c
@@ -29,12 +29,11 @@ asmlinkage void ret_from_kernel_thread(void);
*/
void flush_thread(void){}
-int copy_thread(unsigned long clone_flags,
- unsigned long usp,
- unsigned long kthread_arg,
- struct task_struct *p,
- unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+ unsigned long tls = args->tls;
struct switch_stack *childstack;
struct pt_regs *childregs = task_pt_regs(p);
@@ -48,11 +47,11 @@ int copy_thread(unsigned long clone_flags,
/* setup thread.sp for switch_to !!! */
p->thread.sp = (unsigned long)childstack;
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
memset(childregs, 0, sizeof(struct pt_regs));
childstack->r15 = (unsigned long) ret_from_kernel_thread;
- childstack->r10 = kthread_arg;
- childstack->r9 = usp;
+ childstack->r10 = (unsigned long) args->fn_arg;
+ childstack->r9 = (unsigned long) args->fn;
childregs->sr = mfcr("psr");
} else {
*childregs = *(current_pt_regs());
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index eab03c691f53..f0552f98a7ba 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -50,9 +50,11 @@ void arch_cpu_idle(void)
/*
* Copy architecture-specific thread state
*/
-int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
- struct task_struct *p, unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+ unsigned long tls = args->tls;
struct thread_info *ti = task_thread_info(p);
struct hexagon_switch_stack *ss;
struct pt_regs *childregs;
@@ -73,11 +75,11 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
sizeof(*ss));
ss->lr = (unsigned long)ret_from_fork;
p->thread.switch_sp = ss;
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
memset(childregs, 0, sizeof(struct pt_regs));
/* r24 <- fn, r25 <- arg */
- ss->r24 = usp;
- ss->r25 = arg;
+ ss->r24 = (unsigned long)args->fn;
+ ss->r25 = (unsigned long)args->fn_arg;
pt_set_kmode(childregs);
return 0;
}
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 89025e3b3f61..416305e550e2 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -296,9 +296,12 @@ ia64_load_extra (struct task_struct *task)
* so there is nothing to worry about.
*/
int
-copy_thread(unsigned long clone_flags, unsigned long user_stack_base,
- unsigned long user_stack_size, struct task_struct *p, unsigned long tls)
+copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long user_stack_base = args->stack;
+ unsigned long user_stack_size = args->stack_size;
+ unsigned long tls = args->tls;
extern char ia64_ret_from_clone;
struct switch_stack *child_stack, *stack;
unsigned long rbs, child_rbs, rbs_size;
@@ -339,14 +342,14 @@ copy_thread(unsigned long clone_flags, unsigned long user_stack_base,
ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
- if (unlikely(!user_stack_base)) {
+ if (unlikely(args->fn)) {
+ if (unlikely(args->idle)) {
/* fork_idle() called us */
return 0;
}
memset(child_stack, 0, sizeof(*child_ptregs) + sizeof(*child_stack));
- child_stack->r4 = user_stack_base; /* payload */
- child_stack->r5 = user_stack_size; /* argument */
+ child_stack->r4 = (unsigned long) args->fn;
+ child_stack->r5 = (unsigned long) args->fn_arg;
/*
* Preserve PSR bits, except for bits 32-34 and 37-45,
* which we can't read.
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index e160a7c57bd3..2cb4a61bcfac 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -137,9 +137,11 @@ asmlinkage int m68k_clone3(struct pt_regs *regs)
return sys_clone3((struct clone_args __user *)regs->d1, regs->d2);
}
-int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
- struct task_struct *p, unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+ unsigned long tls = args->tls;
struct fork_frame {
struct switch_stack sw;
struct pt_regs regs;
@@ -156,12 +158,12 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
*/
p->thread.fc = USER_DATA;
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
/* kernel thread */
memset(frame, 0, sizeof(struct fork_frame));
frame->regs.sr = PS_S;
- frame->sw.a3 = usp; /* function */
- frame->sw.d7 = arg;
+ frame->sw.a3 = (unsigned long)args->fn;
+ frame->sw.d7 = (unsigned long)args->fn_arg;
frame->sw.retpc = (unsigned long)ret_from_kernel_thread;
p->thread.usp = 0;
return 0;
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 1b944d319d73..3c6241bcaea8 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -52,20 +52,22 @@ void flush_thread(void)
{
}
-int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
- struct task_struct *p, unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+ unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
struct thread_info *ti = task_thread_info(p);
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
/* if we're creating a new kernel thread then just zeroing all
* the registers. That's OK for a brand new thread.*/
memset(childregs, 0, sizeof(struct pt_regs));
memset(&ti->cpu_context, 0, sizeof(struct cpu_context));
ti->cpu_context.r1 = (unsigned long)childregs;
- ti->cpu_context.r20 = (unsigned long)usp; /* fn */
- ti->cpu_context.r19 = (unsigned long)arg;
+ ti->cpu_context.r20 = (unsigned long)args->fn;
+ ti->cpu_context.r19 = (unsigned long)args->fn_arg;
childregs->pt_mode = 1;
local_save_flags(childregs->msr);
ti->cpu_context.msr = childregs->msr & ~MSR_IE;
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index c2d5f4bfe1f3..35b912bce429 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -105,10 +105,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
/*
* Copy architecture-specific thread state
*/
-int copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long kthread_arg, struct task_struct *p,
- unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+ unsigned long tls = args->tls;
struct thread_info *ti = task_thread_info(p);
struct pt_regs *childregs, *regs = current_pt_regs();
unsigned long childksp;
@@ -120,12 +121,12 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
/* Put the stack after the struct pt_regs. */
childksp = (unsigned long) childregs;
p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK;
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
/* kernel thread */
unsigned long status = p->thread.cp0_status;
memset(childregs, 0, sizeof(struct pt_regs));
- p->thread.reg16 = usp; /* fn */
- p->thread.reg17 = kthread_arg;
+ p->thread.reg16 = (unsigned long)args->fn;
+ p->thread.reg17 = (unsigned long)args->fn_arg;
p->thread.reg29 = childksp;
p->thread.reg31 = (unsigned long) ret_from_kernel_thread;
#if defined(CONFIG_CPU_R3000)
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index f8ea522a1588..29593b98567d 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -100,21 +100,23 @@ void flush_thread(void)
{
}
-int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
- struct task_struct *p, unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+ unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
struct pt_regs *regs;
struct switch_stack *stack;
struct switch_stack *childstack =
((struct switch_stack *)childregs) - 1;
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
memset(childstack, 0,
sizeof(struct switch_stack) + sizeof(struct pt_regs));
- childstack->r16 = usp; /* fn */
- childstack->r17 = arg;
+ childstack->r16 = (unsigned long) args->fn;
+ childstack->r17 = (unsigned long) args->fn_arg;
childstack->ra = (unsigned long) ret_from_kernel_thread;
childregs->estatus = STATUS_PIE;
childregs->sp = (unsigned long) childstack;
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 1d4c0921aafa..52dc983ddeba 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -167,9 +167,11 @@ extern asmlinkage void ret_from_fork(void);
*/
int
-copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
- struct task_struct *p, unsigned long tls)
+copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+ unsigned long tls = args->tls;
struct pt_regs *userregs;
struct pt_regs *kregs;
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
@@ -187,10 +189,10 @@ copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
sp -= sizeof(struct pt_regs);
kregs = (struct pt_regs *)sp;
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
memset(kregs, 0, sizeof(struct pt_regs));
- kregs->gpr[20] = usp; /* fn, kernel thread */
- kregs->gpr[22] = arg;
+ kregs->gpr[20] = (unsigned long)args->fn;
+ kregs->gpr[22] = (unsigned long)args->fn_arg;
} else {
*userregs = *current_pt_regs();
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index d145184696ea..7c37e09c92da 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -206,9 +206,11 @@ arch_initcall(parisc_idle_init);
* Copy architecture-specific thread state
*/
int
-copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long kthread_arg, struct task_struct *p, unsigned long tls)
+copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+ unsigned long tls = args->tls;
struct pt_regs *cregs = &(p->thread.regs);
void *stack = task_stack_page(p);
@@ -218,10 +220,10 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
extern void * const ret_from_kernel_thread;
extern void * const child_return;
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
/* kernel thread */
memset(cregs, 0, sizeof(struct pt_regs));
- if (!usp) /* idle thread */
+ if (args->idle) /* idle thread */
return 0;
/* Must exit via ret_from_kernel_thread in order
* to call schedule_tail()
@@ -233,12 +235,12 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
* ret_from_kernel_thread.
*/
#ifdef CONFIG_64BIT
- cregs->gr[27] = ((unsigned long *)usp)[3];
- cregs->gr[26] = ((unsigned long *)usp)[2];
+ cregs->gr[27] = ((unsigned long *)args->fn)[3];
+ cregs->gr[26] = ((unsigned long *)args->fn)[2];
#else
- cregs->gr[26] = usp;
+ cregs->gr[26] = (unsigned long) args->fn;
#endif
- cregs->gr[25] = kthread_arg;
+ cregs->gr[25] = (unsigned long) args->fn_arg;
} else {
/* user thread */
/* usp must be word aligned. This also prevents users from
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index d00b20c65966..b62046bf3bb8 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1713,10 +1713,11 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
/*
* Copy architecture-specific thread state
*/
-int copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long kthread_arg, struct task_struct *p,
- unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+ unsigned long tls = args->tls;
struct pt_regs *childregs, *kregs;
extern void ret_from_fork(void);
extern void ret_from_fork_scv(void);
@@ -1733,18 +1734,18 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
/* Copy registers */
sp -= sizeof(struct pt_regs);
childregs = (struct pt_regs *) sp;
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
childregs->gpr[1] = sp + sizeof(struct pt_regs);
/* function */
- if (usp)
- childregs->gpr[14] = ppc_function_entry((void *)usp);
+ if (args->fn)
+ childregs->gpr[14] = ppc_function_entry((void *)args->fn);
#ifdef CONFIG_PPC64
clear_tsk_thread_flag(p, TIF_32BIT);
childregs->softe = IRQS_ENABLED;
#endif
- childregs->gpr[15] = kthread_arg;
+ childregs->gpr[15] = (unsigned long)args->fn_arg;
p->thread.regs = NULL; /* no user register state */
ti->flags |= _TIF_RESTOREALL;
f = ret_from_kernel_thread;
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 1c7be865ab31..ceb9ebab6558 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -157,13 +157,15 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
return 0;
}
-int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
- struct task_struct *p, unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+ unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
/* p->thread holds context to be restored by __switch_to() */
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
/* Kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
childregs->gp = gp_in_global;
@@ -171,8 +173,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
childregs->status = SR_PP | SR_PIE;
p->thread.ra = (unsigned long)ret_from_kernel_thread;
- p->thread.s[0] = usp; /* fn */
- p->thread.s[1] = arg;
+ p->thread.s[0] = (unsigned long)args->fn;
+ p->thread.s[1] = (unsigned long)args->fn_arg;
} else {
*childregs = *(current_pt_regs());
if (usp) /* User fork */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 71d86f73b02c..89949b9f3cf8 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -94,9 +94,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
return 0;
}
-int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
- unsigned long arg, struct task_struct *p, unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long new_stackp = args->stack;
+ unsigned long tls = args->tls;
struct fake_frame
{
struct stack_frame sf;
@@ -130,15 +132,15 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
frame->sf.gprs[9] = (unsigned long)frame;
/* Store access registers to kernel stack of new process. */
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
/* kernel thread */
memset(&frame->childregs, 0, sizeof(struct pt_regs));
frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
frame->childregs.psw.addr =
(unsigned long)__ret_from_fork;
- frame->childregs.gprs[9] = new_stackp; /* function */
- frame->childregs.gprs[10] = arg;
+ frame->childregs.gprs[9] = (unsigned long)args->fn;
+ frame->childregs.gprs[10] = (unsigned long)args->fn_arg;
frame->childregs.orig_gpr2 = -1;
frame->childregs.last_break = 1;
return 0;
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index ca01286a0610..a808843375e7 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -92,9 +92,11 @@ void release_thread(struct task_struct *dead_task)
asmlinkage void ret_from_fork(void);
asmlinkage void ret_from_kernel_thread(void);
-int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
- struct task_struct *p, unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+ unsigned long tls = args->tls;
struct thread_info *ti = task_thread_info(p);
struct pt_regs *childregs;
@@ -114,11 +116,11 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
childregs = task_pt_regs(p);
p->thread.sp = (unsigned long) childregs;
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
memset(childregs, 0, sizeof(struct pt_regs));
p->thread.pc = (unsigned long) ret_from_kernel_thread;
- childregs->regs[4] = arg;
- childregs->regs[5] = usp;
+ childregs->regs[4] = (unsigned long) args->fn_arg;
+ childregs->regs[5] = (unsigned long) args->fn;
childregs->sr = SR_MD;
#if defined(CONFIG_SH_FPU)
childregs->sr |= SR_FD;
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 88c0c14aaff0..33b0215a4182 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -259,9 +259,11 @@ clone_stackframe(struct sparc_stackf __user *dst,
extern void ret_from_fork(void);
extern void ret_from_kernel_thread(void);
-int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
- struct task_struct *p, unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long sp = args->stack;
+ unsigned long tls = args->tls;
struct thread_info *ti = task_thread_info(p);
struct pt_regs *childregs, *regs = current_pt_regs();
char *new_stack;
@@ -296,13 +298,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
ti->ksp = (unsigned long) new_stack;
p->thread.kregs = childregs;
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
extern int nwindows;
unsigned long psr;
memset(new_stack, 0, STACKFRAME_SZ + TRACEREG_SZ);
ti->kpc = (((unsigned long) ret_from_kernel_thread) - 0x8);
- childregs->u_regs[UREG_G1] = sp; /* function */
- childregs->u_regs[UREG_G2] = arg;
+ childregs->u_regs[UREG_G1] = (unsigned long) args->fn;
+ childregs->u_regs[UREG_G2] = (unsigned long) args->fn_arg;
psr = childregs->psr = get_psr();
ti->kpsr = psr | PSR_PIL;
ti->kwim = 1 << (((psr & PSR_CWP) + 1) % nwindows);
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 9a2ceb080ac9..6335b698a4b4 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -564,9 +564,11 @@ barf:
* Parent --> %o0 == childs pid, %o1 == 0
* Child --> %o0 == parents pid, %o1 == 1
*/
-int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
- struct task_struct *p, unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long sp = args->stack;
+ unsigned long tls = args->tls;
struct thread_info *t = task_thread_info(p);
struct pt_regs *regs = current_pt_regs();
struct sparc_stackf *parent_sf;
@@ -584,12 +586,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
sizeof(struct sparc_stackf));
t->fpsaved[0] = 0;
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
memset(child_trap_frame, 0, child_stack_sz);
__thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP] =
(current_pt_regs()->tstate + 1) & TSTATE_CWP;
- t->kregs->u_regs[UREG_G1] = sp; /* function */
- t->kregs->u_regs[UREG_G2] = arg;
+ t->kregs->u_regs[UREG_G1] = (unsigned long) args->fn;
+ t->kregs->u_regs[UREG_G2] = (unsigned long) args->fn_arg;
return 0;
}
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 80504680be08..181cc9aafb25 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -154,16 +154,17 @@ void fork_handler(void)
userspace(&current->thread.regs.regs, current_thread_info()->aux_fp_regs);
}
-int copy_thread(unsigned long clone_flags, unsigned long sp,
- unsigned long arg, struct task_struct * p, unsigned long tls)
+int copy_thread(struct task_struct * p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long sp = args->stack;
+ unsigned long tls = args->tls;
void (*handler)(void);
- int kthread = current->flags & (PF_KTHREAD | PF_IO_WORKER);
int ret = 0;
p->thread = (struct thread_struct) INIT_THREAD;
- if (!kthread) {
+ if (!args->fn) {
memcpy(&p->thread.regs.regs, current_pt_regs(),
sizeof(p->thread.regs.regs));
PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0);
@@ -175,14 +176,14 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
arch_copy_thread(&current->thread.arch, &p->thread.arch);
} else {
get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp);
- p->thread.request.u.thread.proc = (int (*)(void *))sp;
- p->thread.request.u.thread.arg = (void *)arg;
+ p->thread.request.u.thread.proc = args->fn;
+ p->thread.request.u.thread.arg = args->fn_arg;
handler = new_thread_handler;
}
new_thread(task_stack_page(p), &p->thread.switch_buf, handler);
- if (!kthread) {
+ if (!args->fn) {
clear_flushed_tls(p);
/*
diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h
index 99a8820e8cc4..b2486b2cbc6e 100644
--- a/arch/x86/include/asm/fpu/sched.h
+++ b/arch/x86/include/asm/fpu/sched.h
@@ -11,7 +11,7 @@
extern void save_fpregs_to_fpstate(struct fpu *fpu);
extern void fpu__drop(struct fpu *fpu);
-extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags);
+extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal);
extern void fpu_flush_thread(void);
/*
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index b5f0d2ff47e4..c08eb0fdd11f 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -78,13 +78,13 @@ static inline void update_task_stack(struct task_struct *task)
}
static inline void kthread_frame_init(struct inactive_task_frame *frame,
- unsigned long fun, unsigned long arg)
+ int (*fun)(void *), void *arg)
{
- frame->bx = fun;
+ frame->bx = (unsigned long)fun;
#ifdef CONFIG_X86_32
- frame->di = arg;
+ frame->di = (unsigned long)arg;
#else
- frame->r12 = arg;
+ frame->r12 = (unsigned long)arg;
#endif
}
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 0fdc807ae13f..0531d6a06df5 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -556,7 +556,7 @@ static inline void fpu_inherit_perms(struct fpu *dst_fpu)
}
/* Clone current's FPU state on fork */
-int fpu_clone(struct task_struct *dst, unsigned long clone_flags)
+int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal)
{
struct fpu *src_fpu = &current->thread.fpu;
struct fpu *dst_fpu = &dst->thread.fpu;
@@ -579,7 +579,7 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags)
* No FPU state inheritance for kernel threads and IO
* worker threads.
*/
- if (dst->flags & (PF_KTHREAD | PF_IO_WORKER)) {
+ if (minimal) {
/* Clear out the minimal state */
memcpy(&dst_fpu->fpstate->regs, &init_fpstate.regs,
init_fpstate_copy_size());
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 58fb48d3004f..9b2772b7e1f3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -131,9 +131,11 @@ static int set_new_tls(struct task_struct *p, unsigned long tls)
return do_set_thread_area_64(p, ARCH_SET_FS, tls);
}
-int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
- struct task_struct *p, unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long sp = args->stack;
+ unsigned long tls = args->tls;
struct inactive_task_frame *frame;
struct fork_frame *fork_frame;
struct pt_regs *childregs;
@@ -171,13 +173,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
frame->flags = X86_EFLAGS_FIXED;
#endif
- fpu_clone(p, clone_flags);
+ fpu_clone(p, clone_flags, args->fn);
/* Kernel thread ? */
if (unlikely(p->flags & PF_KTHREAD)) {
p->thread.pkru = pkru_get_init_value();
memset(childregs, 0, sizeof(struct pt_regs));
- kthread_frame_init(frame, sp, arg);
+ kthread_frame_init(frame, args->fn, args->fn_arg);
return 0;
}
@@ -193,10 +195,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
if (sp)
childregs->sp = sp;
- if (unlikely(p->flags & PF_IO_WORKER)) {
+ if (unlikely(args->fn)) {
/*
- * An IO thread is a user space thread, but it doesn't
- * return to ret_after_fork().
+ * A user space thread, but it doesn't return to
+ * ret_after_fork().
*
* In order to indicate that to tools like gdb,
* we reset the stack and instruction pointers.
@@ -206,7 +208,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
*/
childregs->sp = 0;
childregs->ip = 0;
- kthread_frame_init(frame, sp, arg);
+ kthread_frame_init(frame, args->fn, args->fn_arg);
return 0;
}
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 7e38292dd07a..68e0e2f06d66 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -263,10 +263,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
* involved. Much simpler to just not copy those live frames across.
*/
-int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn,
- unsigned long thread_fn_arg, struct task_struct *p,
- unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long usp_thread_fn = args->stack;
+ unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
#if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS)
@@ -286,7 +287,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn,
#error Unsupported Xtensa ABI
#endif
- if (!(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (!args->fn) {
struct pt_regs *regs = current_pt_regs();
unsigned long usp = usp_thread_fn ?
usp_thread_fn : regs->areg[1];
@@ -338,15 +339,15 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn,
* Window underflow will load registers from the
* spill slots on the stack on return from _switch_to.
*/
- SPILL_SLOT(childregs, 2) = usp_thread_fn;
- SPILL_SLOT(childregs, 3) = thread_fn_arg;
+ SPILL_SLOT(childregs, 2) = (unsigned long)args->fn;
+ SPILL_SLOT(childregs, 3) = (unsigned long)args->fn_arg;
#elif defined(__XTENSA_CALL0_ABI__)
/*
* a12 = thread_fn, a13 = thread_fn arg.
* _switch_to epilogue will load registers from the stack.
*/
- ((unsigned long *)p->thread.sp)[0] = usp_thread_fn;
- ((unsigned long *)p->thread.sp)[1] = thread_fn_arg;
+ ((unsigned long *)p->thread.sp)[0] = (unsigned long)args->fn;
+ ((unsigned long *)p->thread.sp)[1] = (unsigned long)args->fn_arg;
#else
#error Unsupported Xtensa ABI
#endif
diff --git a/fs/exec.c b/fs/exec.c
index 14b4b3755580..0989fb8472a1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1312,9 +1312,7 @@ int begin_new_exec(struct linux_binprm * bprm)
if (retval)
goto out_unlock;
- if (me->flags & PF_KTHREAD)
- free_kthread_struct(me);
- me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
+ me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC |
PF_NOFREEZE | PF_NO_SETAFFINITY);
flush_thread();
me->personality &= ~bprm->per_clear;
@@ -1959,6 +1957,10 @@ int kernel_execve(const char *kernel_filename,
int fd = AT_FDCWD;
int retval;
+ /* It is non-sense for kernel threads to call execve */
+ if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
+ return -EINVAL;
+
filename = getname_kernel(kernel_filename);
if (IS_ERR(filename))
return PTR_ERR(filename);
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 719c9a6cac8d..505aaf9fe477 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -32,6 +32,10 @@ struct kernel_clone_args {
size_t set_tid_size;
int cgroup;
int io_thread;
+ int kthread;
+ int idle;
+ int (*fn)(void *);
+ void *fn_arg;
struct cgroup *cgrp;
struct css_set *cset;
};
@@ -67,8 +71,7 @@ extern void fork_init(void);
extern void release_task(struct task_struct * p);
-extern int copy_thread(unsigned long, unsigned long, unsigned long,
- struct task_struct *, unsigned long);
+extern int copy_thread(struct task_struct *, const struct kernel_clone_args *);
extern void flush_thread(void);
@@ -89,6 +92,7 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
struct task_struct *fork_idle(int);
struct mm_struct *copy_init_mm(void);
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+extern pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags);
extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
int kernel_wait(pid_t pid, int *stat);
diff --git a/init/initramfs.c b/init/initramfs.c
index dc84cf756cea..18229cfe8906 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -15,6 +15,7 @@
#include <linux/mm.h>
#include <linux/namei.h>
#include <linux/init_syscalls.h>
+#include <linux/task_work.h>
#include <linux/umh.h>
static __initdata bool csum_present;
@@ -727,6 +728,7 @@ done:
initrd_end = 0;
flush_delayed_fput();
+ task_work_run();
}
static ASYNC_DOMAIN_EXCLUSIVE(initramfs_domain);
diff --git a/init/main.c b/init/main.c
index 02eb533018f6..0ee39cdcfcac 100644
--- a/init/main.c
+++ b/init/main.c
@@ -688,7 +688,7 @@ noinline void __ref rest_init(void)
* the init task will end up wanting to create kthreads, which, if
* we schedule it before we create kthreadd, will OOPS.
*/
- pid = kernel_thread(kernel_init, NULL, CLONE_FS);
+ pid = user_mode_thread(kernel_init, NULL, CLONE_FS);
/*
* Pin init on the boot CPU. Task migration is not properly working
* until sched_init_smp() has been run. It will set the allowed
diff --git a/kernel/fork.c b/kernel/fork.c
index 124829ed0163..9d44f2d46c69 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1982,7 +1982,7 @@ static __latent_entropy struct task_struct *copy_process(
struct task_struct *p;
struct multiprocess_signals delayed;
struct file *pidfile = NULL;
- u64 clone_flags = args->flags;
+ const u64 clone_flags = args->flags;
struct nsproxy *nsp = current->nsproxy;
/*
@@ -2071,6 +2071,9 @@ static __latent_entropy struct task_struct *copy_process(
p = dup_task_struct(current, node);
if (!p)
goto fork_out;
+ p->flags &= ~PF_KTHREAD;
+ if (args->kthread)
+ p->flags |= PF_KTHREAD;
if (args->io_thread) {
/*
* Mark us an IO worker, and block any signal that isn't
@@ -2160,7 +2163,7 @@ static __latent_entropy struct task_struct *copy_process(
p->io_context = NULL;
audit_set_context(p, NULL);
cgroup_fork(p);
- if (p->flags & PF_KTHREAD) {
+ if (args->kthread) {
if (!set_kthread_struct(p))
goto bad_fork_cleanup_delayacct;
}
@@ -2243,7 +2246,7 @@ static __latent_entropy struct task_struct *copy_process(
retval = copy_io(clone_flags, p);
if (retval)
goto bad_fork_cleanup_namespaces;
- retval = copy_thread(clone_flags, args->stack, args->stack_size, p, args->tls);
+ retval = copy_thread(p, args);
if (retval)
goto bad_fork_cleanup_io;
@@ -2547,11 +2550,21 @@ static inline void init_idle_pids(struct task_struct *idle)
}
}
+static int idle_dummy(void *dummy)
+{
+ /* This function is never called */
+ return 0;
+}
+
struct task_struct * __init fork_idle(int cpu)
{
struct task_struct *task;
struct kernel_clone_args args = {
- .flags = CLONE_VM,
+ .flags = CLONE_VM,
+ .fn = &idle_dummy,
+ .fn_arg = NULL,
+ .kthread = 1,
+ .idle = 1,
};
task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args);
@@ -2582,8 +2595,8 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
.flags = ((lower_32_bits(flags) | CLONE_VM |
CLONE_UNTRACED) & ~CSIGNAL),
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
- .stack = (unsigned long)fn,
- .stack_size = (unsigned long)arg,
+ .fn = fn,
+ .fn_arg = arg,
.io_thread = 1,
};
@@ -2687,8 +2700,25 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
.flags = ((lower_32_bits(flags) | CLONE_VM |
CLONE_UNTRACED) & ~CSIGNAL),
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
- .stack = (unsigned long)fn,
- .stack_size = (unsigned long)arg,
+ .fn = fn,
+ .fn_arg = arg,
+ .kthread = 1,
+ };
+
+ return kernel_clone(&args);
+}
+
+/*
+ * Create a user mode thread.
+ */
+pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+ struct kernel_clone_args args = {
+ .flags = ((lower_32_bits(flags) | CLONE_VM |
+ CLONE_UNTRACED) & ~CSIGNAL),
+ .exit_signal = (lower_32_bits(flags) & CSIGNAL),
+ .fn = fn,
+ .fn_arg = arg,
};
return kernel_clone(&args);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8c5b74f66bd3..77b2048a9326 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2927,7 +2927,7 @@ static void task_tick_numa(struct rq *rq, struct task_struct *curr)
/*
* We don't care about NUMA placement if we don't have memory.
*/
- if ((curr->flags & (PF_EXITING | PF_KTHREAD)) || work->next != work)
+ if (!curr->mm || (curr->flags & (PF_EXITING | PF_KTHREAD)) || work->next != work)
return;
/*
diff --git a/kernel/umh.c b/kernel/umh.c
index 36c123360ab8..b989736e8707 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -132,7 +132,7 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info)
/* If SIGCLD is ignored do_wait won't populate the status. */
kernel_sigaction(SIGCHLD, SIG_DFL);
- pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
+ pid = user_mode_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
if (pid < 0)
sub_info->retval = pid;
else
@@ -171,8 +171,8 @@ static void call_usermodehelper_exec_work(struct work_struct *work)
* want to pollute current->children, and we need a parent
* that always ignores SIGCHLD to ensure auto-reaping.
*/
- pid = kernel_thread(call_usermodehelper_exec_async, sub_info,
- CLONE_PARENT | SIGCHLD);
+ pid = user_mode_thread(call_usermodehelper_exec_async, sub_info,
+ CLONE_PARENT | SIGCHLD);
if (pid < 0) {
sub_info->retval = pid;
umh_complete(sub_info);