1 files changed, 22 insertions, 57 deletions
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index b137fd63f5e1..5d90ee9aebf1 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -839,43 +839,29 @@ void smp_flush_tlb_all(void)
  *    questionable (in theory the big win for threads is the massive sharing of
  *    address space state across processors).
  */
+
+/* This currently is only used by the hugetlb arch pre-fault
+ * hook on UltraSPARC-III+ and later when changing the pagesize
+ * bits of the context register for an address space.
+ */
 void smp_flush_tlb_mm(struct mm_struct *mm)
 {
-        /*
-         * This code is called from two places, dup_mmap and exit_mmap. In the
-         * former case, we really need a flush. In the later case, the callers
-         * are single threaded exec_mmap (really need a flush), multithreaded
-         * exec_mmap case (do not need to flush, since the caller gets a new
-         * context via activate_mm), and all other callers of mmput() whence
-         * the flush can be optimized since the associated threads are dead and
-         * the mm is being torn down (__exit_mm and other mmput callers) or the
-         * owning thread is dissociating itself from the mm. The
-         * (atomic_read(&mm->mm_users) == 0) check ensures real work is done
-         * for single thread exec and dup_mmap cases. An alternate check might
-         * have been (current->mm != mm).
-         *                                              Kanoj Sarcar
-         */
-        if (atomic_read(&mm->mm_users) == 0)
-                return;
-
-	{
-		u32 ctx = CTX_HWBITS(mm->context);
-		int cpu = get_cpu();
+	u32 ctx = CTX_HWBITS(mm->context);
+	int cpu = get_cpu();
 
-		if (atomic_read(&mm->mm_users) == 1) {
-			mm->cpu_vm_mask = cpumask_of_cpu(cpu);
-			goto local_flush_and_out;
-		}
+	if (atomic_read(&mm->mm_users) == 1) {
+		mm->cpu_vm_mask = cpumask_of_cpu(cpu);
+		goto local_flush_and_out;
+	}
 
-		smp_cross_call_masked(&xcall_flush_tlb_mm,
-				      ctx, 0, 0,
-				      mm->cpu_vm_mask);
+	smp_cross_call_masked(&xcall_flush_tlb_mm,
+			      ctx, 0, 0,
+			      mm->cpu_vm_mask);
 
-	local_flush_and_out:
-		__flush_tlb_mm(ctx, SECONDARY_CONTEXT);
+local_flush_and_out:
+	__flush_tlb_mm(ctx, SECONDARY_CONTEXT);
 
-		put_cpu();
-	}
+	put_cpu();
 }
 
 void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
@@ -883,34 +869,13 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long
 	u32 ctx = CTX_HWBITS(mm->context);
 	int cpu = get_cpu();
 
-	if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1) {
+	if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1)
 		mm->cpu_vm_mask = cpumask_of_cpu(cpu);
-		goto local_flush_and_out;
-	} else {
-		/* This optimization is not valid.  Normally
-		 * we will be holding the page_table_lock, but
-		 * there is an exception which is copy_page_range()
-		 * when forking.  The lock is held during the individual
-		 * page table updates in the parent, but not at the
-		 * top level, which is where we are invoked.
-		 */
-		if (0) {
-			cpumask_t this_cpu_mask = cpumask_of_cpu(cpu);
-
-			/* By virtue of running under the mm->page_table_lock,
-			 * and mmu_context.h:switch_mm doing the same, the
-			 * following operation is safe.
-			 */
-			if (cpus_equal(mm->cpu_vm_mask, this_cpu_mask))
-				goto local_flush_and_out;
-		}
-	}
-
-	smp_cross_call_masked(&xcall_flush_tlb_pending,
-			      ctx, nr, (unsigned long) vaddrs,
-			      mm->cpu_vm_mask);
+	else
+		smp_cross_call_masked(&xcall_flush_tlb_pending,
+				      ctx, nr, (unsigned long) vaddrs,
+				      mm->cpu_vm_mask);
 
-local_flush_and_out:
 	__flush_tlb_pending(ctx, nr, vaddrs);
 
 	put_cpu();