From f8f2a8522a88aacd62a310ce49e8dac530d1b403 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 9 Jul 2011 16:09:43 +0100
Subject: ARM: vfp: fix a hole in VFP thread migration

Fix a hole in the VFP thread migration.  Lets define two threads.

Thread 1, we'll call 'interesting_thread' which is a thread which is
running on CPU0, using VFP (so vfp_current_hw_state[0] =
&interesting_thread->vfpstate) and gets migrated off to CPU1, where
it continues execution of VFP instructions.

Thread 2, we'll call 'new_cpu0_thread' which is the thread which takes
over on CPU0.  This has also been using VFP, and last used VFP on CPU0,
but doesn't use it again.

The following code will be executed twice:

		cpu = thread->cpu;

		/*
		 * On SMP, if VFP is enabled, save the old state in
		 * case the thread migrates to a different CPU. The
		 * restoring is done lazily.
		 */
		if ((fpexc & FPEXC_EN) && vfp_current_hw_state[cpu]) {
			vfp_save_state(vfp_current_hw_state[cpu], fpexc);
			vfp_current_hw_state[cpu]->hard.cpu = cpu;
		}
		/*
		 * Thread migration, just force the reloading of the
		 * state on the new CPU in case the VFP registers
		 * contain stale data.
		 */
		if (thread->vfpstate.hard.cpu != cpu)
			vfp_current_hw_state[cpu] = NULL;

The first execution will be on CPU0 to switch away from 'interesting_thread'.
interesting_thread->cpu will be 0.

So, vfp_current_hw_state[0] points at interesting_thread->vfpstate.
The hardware state will be saved, along with the CPU number (0) that
it was executing on.

'thread' will be 'new_cpu0_thread' with new_cpu0_thread->cpu = 0.
Also, because it was executing on CPU0, new_cpu0_thread->vfpstate.hard.cpu = 0,
and so the thread migration check is not triggered.

This means that vfp_current_hw_state[0] remains pointing at interesting_thread.

The second execution will be on CPU1 to switch _to_ 'interesting_thread'.
So, 'thread' will be 'interesting_thread' and interesting_thread->cpu now
will be 1.  The previous thread executing on CPU1 is not relevant to this
so we shall ignore that.

We get to the thread migration check.  Here, we discover that
interesting_thread->vfpstate.hard.cpu = 0, yet interesting_thread->cpu is
now 1, indicating thread migration.  We set vfp_current_hw_state[1] to
NULL.

So, at this point vfp_current_hw_state[] contains the following:

[0] = &interesting_thread->vfpstate
[1] = NULL

Our interesting thread now executes a VFP instruction, takes a fault
which loads the state into the VFP hardware.  Now, through the assembly
we now have:

[0] = &interesting_thread->vfpstate
[1] = &interesting_thread->vfpstate

CPU1 stops due to ptrace (and so saves its VFP state) using the thread
switch code above), and CPU0 calls vfp_sync_hwstate().

	if (vfp_current_hw_state[cpu] == &thread->vfpstate) {
		vfp_save_state(&thread->vfpstate, fpexc | FPEXC_EN);

BANG, we corrupt interesting_thread's VFP state by overwriting the
more up-to-date state saved by CPU1 with the old VFP state from CPU0.

Fix this by ensuring that we have sane semantics for the various state
describing variables:

1. vfp_current_hw_state[] points to the current owner of the context
   information stored in each CPUs hardware, or NULL if that state
   information is invalid.
2. thread->vfpstate.hard.cpu always contains the most recent CPU number
   which the state was loaded into or NR_CPUS if no CPU owns the state.

So, for a particular CPU to be a valid owner of the VFP state for a
particular thread t, two things must be true:

 vfp_current_hw_state[cpu] == &t->vfpstate && t->vfpstate.hard.cpu == cpu.

and that is valid from the moment a CPU loads the saved VFP context
into the hardware.  This gives clear and consistent semantics to
interpreting these variables.

This patch also fixes thread copying, ensuring that t->vfpstate.hard.cpu
is invalidated, otherwise CPU0 may believe it was the last owner.  The
hole can happen thus:

- thread1 runs on CPU2 using VFP, migrates to CPU3, exits and thread_info
  freed.
- New thread allocated from a previously running thread on CPU2, reusing
  memory for thread1 and copying vfp.hard.cpu.

At this point, the following are true:

	new_thread1->vfpstate.hard.cpu == 2
	&new_thread1->vfpstate == vfp_current_hw_state[2]

Lastly, this also addresses thread flushing in a similar way to thread
copying.  Hole is:

- thread runs on CPU0, using VFP, migrates to CPU1 but does not use VFP.
- thread calls execve(), so thread flush happens, leaving
  vfp_current_hw_state[0] intact.  This vfpstate is memset to 0 causing
  thread->vfpstate.hard.cpu = 0.
- thread migrates back to CPU0 before using VFP.

At this point, the following are true:

	thread->vfpstate.hard.cpu == 0
	&thread->vfpstate == vfp_current_hw_state[0]

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/vfp/vfphw.S | 43 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 9 deletions(-)

(limited to 'arch/arm/vfp/vfphw.S')

diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index 404538ae591d..2d30c7f6edd3 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -82,19 +82,22 @@ ENTRY(vfp_support_entry)
 	ldr	r4, [r3, r11, lsl #2]	@ vfp_current_hw_state pointer
 	bic	r5, r1, #FPEXC_EX	@ make sure exceptions are disabled
 	cmp	r4, r10			@ this thread owns the hw context?
+#ifndef CONFIG_SMP
+	@ For UP, checking that this thread owns the hw context is
+	@ sufficient to determine that the hardware state is valid.
 	beq	vfp_hw_state_valid
 
+	@ On UP, we lazily save the VFP context.  As a different
+	@ thread wants ownership of the VFP hardware, save the old
+	@ state if there was a previous (valid) owner.
+
 	VFPFMXR	FPEXC, r5		@ enable VFP, disable any pending
 					@ exceptions, so we can get at the
 					@ rest of it
 
-#ifndef CONFIG_SMP
-	@ Save out the current registers to the old thread state
-	@ No need for SMP since this is not done lazily
-
 	DBGSTR1	"save old state %p", r4
-	cmp	r4, #0
-	beq	no_old_VFP_process
+	cmp	r4, #0			@ if the vfp_current_hw_state is NULL
+	beq	vfp_reload_hw		@ then the hw state needs reloading
 	VFPFSTMIA r4, r5		@ save the working registers
 	VFPFMRX	r5, FPSCR		@ current status
 #ifndef CONFIG_CPU_FEROCEON
@@ -107,11 +110,33 @@ ENTRY(vfp_support_entry)
 1:
 #endif
 	stmia	r4, {r1, r5, r6, r8}	@ save FPEXC, FPSCR, FPINST, FPINST2
-					@ and point r4 at the word at the
-					@ start of the register dump
+vfp_reload_hw:
+
+#else
+	@ For SMP, if this thread does not own the hw context, then we
+	@ need to reload it.  No need to save the old state as on SMP,
+	@ we always save the state when we switch away from a thread.
+	bne	vfp_reload_hw
+
+	@ This thread has ownership of the current hardware context.
+	@ However, it may have been migrated to another CPU, in which
+	@ case the saved state is newer than the hardware context.
+	@ Check this by looking at the CPU number which the state was
+	@ last loaded onto.
+	ldr	ip, [r10, #VFP_CPU]
+	teq	ip, r11
+	beq	vfp_hw_state_valid
+
+vfp_reload_hw:
+	@ We're loading this threads state into the VFP hardware. Update
+	@ the CPU number which contains the most up to date VFP context.
+	str	r11, [r10, #VFP_CPU]
+
+	VFPFMXR	FPEXC, r5		@ enable VFP, disable any pending
+					@ exceptions, so we can get at the
+					@ rest of it
 #endif
 
-no_old_VFP_process:
 	DBGSTR1	"load state %p", r10
 	str	r10, [r3, r11, lsl #2]	@ update the vfp_current_hw_state pointer
 					@ Load the saved state back into the VFP
-- 
cgit v1.2.3