From 588974857359861891f478a070b1dc7ae04a3880 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 31 May 2017 11:25:20 -0400 Subject: sparc64: reset mm cpumask after wrap After a wrap (getting a new context version) a process must get a new context id, which means that we would need to flush the context id from the TLB before running for the first time with this ID on every CPU. But, we use mm_cpumask to determine if this process has been running on this CPU before, and this mask is not reset after a wrap. So, there are two possible fixes for this issue: 1. Clear mm cpumask whenever mm gets a new context id 2. Unconditionally flush context every time process is running on a CPU This patch implements the first solution Signed-off-by: Pavel Tatashin Reviewed-by: Bob Picco Reviewed-by: Steven Sistare Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 7ca1b9dc7d64..f8e30a3906be 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -759,6 +759,8 @@ void get_new_mmu_context(struct mm_struct *mm) goto out; } } + if (mm->context.sparc64_ctx_val) + cpumask_clear(mm_cpumask(mm)); mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63)); new_ctx |= (tlb_context_cache & CTX_VERSION_MASK); out: -- cgit v1.2.3 From 14d0334c6748ff2aedb3f2f7fdc51ee90a9b54e7 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 31 May 2017 11:25:21 -0400 Subject: sparc64: combine activate_mm and switch_mm The only difference between these two functions is that in activate_mm we unconditionally flush context. However, there is no need to keep this difference after fixing a bug where cpumask was not reset on a wrap. So, in this patch we combine these. Signed-off-by: Pavel Tatashin Reviewed-by: Bob Picco Reviewed-by: Steven Sistare Signed-off-by: David S. Miller --- arch/sparc/include/asm/mmu_context_64.h | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index 22fede6eba11..734a1343d77d 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -133,26 +133,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str } #define deactivate_mm(tsk,mm) do { } while (0) - -/* Activate a new MM instance for the current task. */ -static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm) -{ - unsigned long flags; - int cpu; - - spin_lock_irqsave(&mm->context.lock, flags); - if (!CTX_VALID(mm->context)) - get_new_mmu_context(mm); - cpu = smp_processor_id(); - if (!cpumask_test_cpu(cpu, mm_cpumask(mm))) - cpumask_set_cpu(cpu, mm_cpumask(mm)); - - load_secondary_context(mm); - __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); - tsb_context_switch(mm); - spin_unlock_irqrestore(&mm->context.lock, flags); -} - +#define activate_mm(active_mm, mm) switch_mm(active_mm, mm, NULL) #endif /* !(__ASSEMBLY__) */ #endif /* !(__SPARC64_MMU_CONTEXT_H) */ -- cgit v1.2.3 From c4415235b2be0cc791572e8e7f7466ab8f73a2bf Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 31 May 2017 11:25:22 -0400 Subject: sparc64: redefine first version CTX_FIRST_VERSION defines the first context version, but also it defines first context. This patch redefines it to only include the first context version. Signed-off-by: Pavel Tatashin Reviewed-by: Bob Picco Reviewed-by: Steven Sistare Signed-off-by: David S. Miller --- arch/sparc/include/asm/mmu_64.h | 2 +- arch/sparc/mm/init_64.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h index f7de0dbc38af..83b36a5371ff 100644 --- a/arch/sparc/include/asm/mmu_64.h +++ b/arch/sparc/include/asm/mmu_64.h @@ -52,7 +52,7 @@ #define CTX_NR_MASK TAG_CONTEXT_BITS #define CTX_HW_MASK (CTX_NR_MASK | CTX_PGSZ_MASK) -#define CTX_FIRST_VERSION ((_AC(1,UL) << CTX_VERSION_SHIFT) + _AC(1,UL)) +#define CTX_FIRST_VERSION BIT(CTX_VERSION_SHIFT) #define CTX_VALID(__ctx) \ (!(((__ctx.sparc64_ctx_val) ^ tlb_context_cache) & CTX_VERSION_MASK)) #define CTX_HWBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_HW_MASK) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index f8e30a3906be..63b50447bb62 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -707,7 +707,7 @@ EXPORT_SYMBOL(__flush_dcache_range); /* get_new_mmu_context() uses "cache + 1". */ DEFINE_SPINLOCK(ctx_alloc_lock); -unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1; +unsigned long tlb_context_cache = CTX_FIRST_VERSION; #define MAX_CTX_NR (1UL << CTX_NR_BITS) #define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR) DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR); @@ -738,9 +738,9 @@ void get_new_mmu_context(struct mm_struct *mm) if (new_ctx >= ctx) { int i; new_ctx = (tlb_context_cache & CTX_VERSION_MASK) + - CTX_FIRST_VERSION; + CTX_FIRST_VERSION + 1; if (new_ctx == 1) - new_ctx = CTX_FIRST_VERSION; + new_ctx = CTX_FIRST_VERSION + 1; /* Don't call memset, for 16 entries that's just * plain silly... -- cgit v1.2.3 From 7a5b4bbf49fe86ce77488a70c5dccfe2d50d7a2d Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 31 May 2017 11:25:23 -0400 Subject: sparc64: add per-cpu mm of secondary contexts The new wrap is going to use information from this array to figure out mm's that currently have valid secondary contexts setup. Signed-off-by: Pavel Tatashin Reviewed-by: Bob Picco Reviewed-by: Steven Sistare Signed-off-by: David S. Miller --- arch/sparc/include/asm/mmu_context_64.h | 5 +++-- arch/sparc/mm/init_64.c | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index 734a1343d77d..edb45247bfa9 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -19,6 +19,7 @@ extern spinlock_t ctx_alloc_lock; extern unsigned long tlb_context_cache; extern unsigned long mmu_context_bmap[]; +DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm); void get_new_mmu_context(struct mm_struct *mm); #ifdef CONFIG_SMP void smp_new_mmu_context_version(void); @@ -76,8 +77,9 @@ void __flush_tlb_mm(unsigned long, unsigned long); static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk) { unsigned long ctx_valid, flags; - int cpu; + int cpu = smp_processor_id(); + per_cpu(per_cpu_secondary_mm, cpu) = mm; if (unlikely(mm == &init_mm)) return; @@ -123,7 +125,6 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str * for the first time, we must flush that context out of the * local TLB. */ - cpu = smp_processor_id(); if (!ctx_valid || !cpumask_test_cpu(cpu, mm_cpumask(mm))) { cpumask_set_cpu(cpu, mm_cpumask(mm)); __flush_tlb_mm(CTX_HWBITS(mm->context), diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 63b50447bb62..a4c0bc8af820 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -711,6 +711,7 @@ unsigned long tlb_context_cache = CTX_FIRST_VERSION; #define MAX_CTX_NR (1UL << CTX_NR_BITS) #define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR) DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR); +DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0}; /* Caller does TLB context flushing on local CPU if necessary. * The caller also ensures that CTX_VALID(mm->context) is false. -- cgit v1.2.3 From a0582f26ec9dfd5360ea2f35dd9a1b026f8adda0 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 31 May 2017 11:25:24 -0400 Subject: sparc64: new context wrap The current wrap implementation has a race issue: it is called outside of the ctx_alloc_lock, and also does not wait for all CPUs to complete the wrap. This means that a thread can get a new context with a new version and another thread might still be running with the same context. The problem is especially severe on CPUs with shared TLBs, like sun4v. I used the following test to very quickly reproduce the problem: - start over 8K processes (must be more than context IDs) - write and read values at a memory location in every process. Very quickly memory corruptions start happening, and what we read back does not equal what we wrote. Several approaches were explored before settling on this one: Approach 1: Move smp_new_mmu_context_version() inside ctx_alloc_lock, and wait for every process to complete the wrap. (Note: every CPU must WAIT before leaving smp_new_mmu_context_version_client() until every one arrives). This approach ends up with deadlocks, as some threads own locks which other threads are waiting for, and they never receive softint until these threads exit smp_new_mmu_context_version_client(). Since we do not allow the exit, deadlock happens. Approach 2: Handle wrap right during mondo interrupt. Use etrap/rtrap to enter into into C code, and issue new versions to every CPU. This approach adds some overhead to runtime: in switch_mm() we must add some checks to make sure that versions have not changed due to wrap while we were loading the new secondary context. (could be protected by PSTATE_IE but that degrades performance as on M7 and older CPUs as it takes 50 cycles for each access). Also, we still need a global per-cpu array of MMs to know where we need to load new contexts, otherwise we can change context to a thread that is going way (if we received mondo between switch_mm() and switch_to() time). Finally, there are some issues with window registers in rtrap() when context IDs are changed during CPU mondo time. The approach in this patch is the simplest and has almost no impact on runtime. We use the array with mm's where last secondary contexts were loaded onto CPUs and bump their versions to the new generation without changing context IDs. If a new process comes in to get a context ID, it will go through get_new_mmu_context() because of version mismatch. But the running processes do not need to be interrupted. And wrap is quicker as we do not need to xcall and wait for everyone to receive and complete wrap. Signed-off-by: Pavel Tatashin Reviewed-by: Bob Picco Reviewed-by: Steven Sistare Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 81 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index a4c0bc8af820..3c40ebd50f92 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -713,6 +713,53 @@ unsigned long tlb_context_cache = CTX_FIRST_VERSION; DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR); DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0}; +static void mmu_context_wrap(void) +{ + unsigned long old_ver = tlb_context_cache & CTX_VERSION_MASK; + unsigned long new_ver, new_ctx, old_ctx; + struct mm_struct *mm; + int cpu; + + bitmap_zero(mmu_context_bmap, 1 << CTX_NR_BITS); + + /* Reserve kernel context */ + set_bit(0, mmu_context_bmap); + + new_ver = (tlb_context_cache & CTX_VERSION_MASK) + CTX_FIRST_VERSION; + if (unlikely(new_ver == 0)) + new_ver = CTX_FIRST_VERSION; + tlb_context_cache = new_ver; + + /* + * Make sure that any new mm that are added into per_cpu_secondary_mm, + * are going to go through get_new_mmu_context() path. + */ + mb(); + + /* + * Updated versions to current on those CPUs that had valid secondary + * contexts + */ + for_each_online_cpu(cpu) { + /* + * If a new mm is stored after we took this mm from the array, + * it will go into get_new_mmu_context() path, because we + * already bumped the version in tlb_context_cache. + */ + mm = per_cpu(per_cpu_secondary_mm, cpu); + + if (unlikely(!mm || mm == &init_mm)) + continue; + + old_ctx = mm->context.sparc64_ctx_val; + if (likely((old_ctx & CTX_VERSION_MASK) == old_ver)) { + new_ctx = (old_ctx & ~CTX_VERSION_MASK) | new_ver; + set_bit(new_ctx & CTX_NR_MASK, mmu_context_bmap); + mm->context.sparc64_ctx_val = new_ctx; + } + } +} + /* Caller does TLB context flushing on local CPU if necessary. * The caller also ensures that CTX_VALID(mm->context) is false. * @@ -727,50 +774,30 @@ void get_new_mmu_context(struct mm_struct *mm) { unsigned long ctx, new_ctx; unsigned long orig_pgsz_bits; - int new_version; spin_lock(&ctx_alloc_lock); +retry: + /* wrap might have happened, test again if our context became valid */ + if (unlikely(CTX_VALID(mm->context))) + goto out; orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); ctx = (tlb_context_cache + 1) & CTX_NR_MASK; new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); - new_version = 0; if (new_ctx >= (1 << CTX_NR_BITS)) { new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1); if (new_ctx >= ctx) { - int i; - new_ctx = (tlb_context_cache & CTX_VERSION_MASK) + - CTX_FIRST_VERSION + 1; - if (new_ctx == 1) - new_ctx = CTX_FIRST_VERSION + 1; - - /* Don't call memset, for 16 entries that's just - * plain silly... - */ - mmu_context_bmap[0] = 3; - mmu_context_bmap[1] = 0; - mmu_context_bmap[2] = 0; - mmu_context_bmap[3] = 0; - for (i = 4; i < CTX_BMAP_SLOTS; i += 4) { - mmu_context_bmap[i + 0] = 0; - mmu_context_bmap[i + 1] = 0; - mmu_context_bmap[i + 2] = 0; - mmu_context_bmap[i + 3] = 0; - } - new_version = 1; - goto out; + mmu_context_wrap(); + goto retry; } } if (mm->context.sparc64_ctx_val) cpumask_clear(mm_cpumask(mm)); mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63)); new_ctx |= (tlb_context_cache & CTX_VERSION_MASK); -out: tlb_context_cache = new_ctx; mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; +out: spin_unlock(&ctx_alloc_lock); - - if (unlikely(new_version)) - smp_new_mmu_context_version(); } static int numa_enabled = 1; -- cgit v1.2.3 From 0197e41ce70511dc3b71f7fefa1a676e2b5cd60b Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 31 May 2017 11:25:25 -0400 Subject: sparc64: delete old wrap code The old method that is using xcall and softint to get new context id is deleted, as it is replaced by a method of using per_cpu_secondary_mm without xcall to perform the context wrap. Signed-off-by: Pavel Tatashin Reviewed-by: Bob Picco Reviewed-by: Steven Sistare Signed-off-by: David S. Miller --- arch/sparc/include/asm/mmu_context_64.h | 6 ------ arch/sparc/include/asm/pil.h | 1 - arch/sparc/kernel/kernel.h | 1 - arch/sparc/kernel/smp_64.c | 31 ------------------------------- arch/sparc/kernel/ttable_64.S | 2 +- arch/sparc/mm/ultra.S | 5 ----- 6 files changed, 1 insertion(+), 45 deletions(-) diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index edb45247bfa9..2cddcda4f85f 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -21,12 +21,6 @@ extern unsigned long mmu_context_bmap[]; DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm); void get_new_mmu_context(struct mm_struct *mm); -#ifdef CONFIG_SMP -void smp_new_mmu_context_version(void); -#else -#define smp_new_mmu_context_version() do { } while (0) -#endif - int init_new_context(struct task_struct *tsk, struct mm_struct *mm); void destroy_context(struct mm_struct *mm); diff --git a/arch/sparc/include/asm/pil.h b/arch/sparc/include/asm/pil.h index 266937030546..522b43db2ed3 100644 --- a/arch/sparc/include/asm/pil.h +++ b/arch/sparc/include/asm/pil.h @@ -20,7 +20,6 @@ #define PIL_SMP_CALL_FUNC 1 #define PIL_SMP_RECEIVE_SIGNAL 2 #define PIL_SMP_CAPTURE 3 -#define PIL_SMP_CTX_NEW_VERSION 4 #define PIL_DEVICE_IRQ 5 #define PIL_SMP_CALL_FUNC_SNGL 6 #define PIL_DEFERRED_PCR_WORK 7 diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index c9804551262c..6ae1e77be0bf 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -37,7 +37,6 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr /* smp_64.c */ void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs); void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs); -void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs); void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs); void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index b3bc0ac757cc..fdf31040a7dc 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -964,37 +964,6 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) preempt_enable(); } -void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) -{ - struct mm_struct *mm; - unsigned long flags; - - clear_softint(1 << irq); - - /* See if we need to allocate a new TLB context because - * the version of the one we are using is now out of date. - */ - mm = current->active_mm; - if (unlikely(!mm || (mm == &init_mm))) - return; - - spin_lock_irqsave(&mm->context.lock, flags); - - if (unlikely(!CTX_VALID(mm->context))) - get_new_mmu_context(mm); - - spin_unlock_irqrestore(&mm->context.lock, flags); - - load_secondary_context(mm); - __flush_tlb_mm(CTX_HWBITS(mm->context), - SECONDARY_CONTEXT); -} - -void smp_new_mmu_context_version(void) -{ - smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0); -} - #ifdef CONFIG_KGDB void kgdb_roundup_cpus(unsigned long flags) { diff --git a/arch/sparc/kernel/ttable_64.S b/arch/sparc/kernel/ttable_64.S index 7bd8f6556352..efe93ab4a9c0 100644 --- a/arch/sparc/kernel/ttable_64.S +++ b/arch/sparc/kernel/ttable_64.S @@ -50,7 +50,7 @@ tl0_resv03e: BTRAP(0x3e) BTRAP(0x3f) BTRAP(0x40) tl0_irq1: TRAP_IRQ(smp_call_function_client, 1) tl0_irq2: TRAP_IRQ(smp_receive_signal_client, 2) tl0_irq3: TRAP_IRQ(smp_penguin_jailcell, 3) -tl0_irq4: TRAP_IRQ(smp_new_mmu_context_version_client, 4) +tl0_irq4: BTRAP(0x44) #else tl0_irq1: BTRAP(0x41) tl0_irq2: BTRAP(0x42) diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index 5d2fd6cd3189..fcf4d27a38fb 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -971,11 +971,6 @@ xcall_capture: wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint retry - .globl xcall_new_mmu_context_version -xcall_new_mmu_context_version: - wr %g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint - retry - #ifdef CONFIG_KGDB .globl xcall_kgdb_capture xcall_kgdb_capture: -- cgit v1.2.3