From 993ff6d9df74305bc7b5bbc7a0810cf599b6394c Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 7 Aug 2018 23:21:56 +1000
Subject: powerpc/64s: Fix PACA_IRQ_HARD_DIS accounting in idle_power4()

When idle_power4() hard disables interrupts then finds a soft pending
interrupt, it returns with interrupts hard disabled but without
PACA_IRQ_HARD_DIS set. Commit 9b81c0211c ("powerpc/64s: make
PACA_IRQ_HARD_DIS track MSR[EE] closely") added a warning for that
condition (since disabled).

Fix this by adding the PACA_IRQ_HARD_DIS for that case.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/idle_power4.S | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
index dd7471fe20bd..a09b3c7ca176 100644
--- a/arch/powerpc/kernel/idle_power4.S
+++ b/arch/powerpc/kernel/idle_power4.S
@@ -32,6 +32,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
 	cmpwi	0,r4,0
 	beqlr
 
+	/* This sequence is similar to prep_irq_for_idle() */
+
 	/* Hard disable interrupts */
 	mfmsr	r7
 	rldicl	r0,r7,48,1
@@ -41,10 +43,15 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
 	/* Check if something happened while soft-disabled */
 	lbz	r0,PACAIRQHAPPENED(r13)
 	cmpwi	cr0,r0,0
-	bnelr
+	bne-	2f
 
-	/* Soft-enable interrupts */
+	/*
+	 * Soft-enable interrupts. This will make power4_fixup_nap return
+	 * to our caller with interrupts enabled (soft and hard). The caller
+	 * can cope with either interrupts disabled or enabled upon return.
+	 */
 #ifdef CONFIG_TRACE_IRQFLAGS
+	/* Tell the tracer interrupts are on, because idle responds to them. */
 	mflr	r0
 	std	r0,16(r1)
 	stdu    r1,-128(r1)
@@ -73,3 +80,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	isync
 	b	1b
 
+2:	/* Return if an interrupt had happened while soft disabled */
+	/* Set the HARD_DIS flag because interrupts are now hard disabled */
+	ori	r0,r0,PACA_IRQ_HARD_DIS
+	stb	r0,PACAIRQHAPPENED(r13)
+	blr
-- 
cgit v1.2.3


From 997dd26cb3c8b7c9b8765751cc1491ad33b2024f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 16 Aug 2018 15:27:47 +1000
Subject: powerpc/traps: Avoid rate limit messages from show unhandled signals

In the recent commit to add an explicit ratelimit state when showing
unhandled signals, commit 35a52a10c3ac ("powerpc/traps: Use an
explicit ratelimit state for show_signal_msg()"), I put the check of
show_unhandled_signals and the ratelimit state before the call to
unhandled_signal() so as to avoid unnecessarily calling the latter
when show_unhandled_signals is false.

However that causes us to check the ratelimit state on every call, so
if we take a lot of *handled* signals that has the effect of making
the ratelimit code print warnings that callbacks have been suppressed
when they haven't.

So rearrange the code so that we check show_unhandled_signals first,
then call unhandled_signal() and finally check the ratelimit state.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
---
 arch/powerpc/kernel/traps.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 070e96f1773a..c85adb858271 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -315,22 +315,21 @@ void user_single_step_siginfo(struct task_struct *tsk,
 	info->si_addr = (void __user *)regs->nip;
 }
 
-static bool show_unhandled_signals_ratelimited(void)
+static void show_signal_msg(int signr, struct pt_regs *regs, int code,
+			    unsigned long addr)
 {
 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
 				      DEFAULT_RATELIMIT_BURST);
-	return show_unhandled_signals && __ratelimit(&rs);
-}
 
-static void show_signal_msg(int signr, struct pt_regs *regs, int code,
-			    unsigned long addr)
-{
-	if (!show_unhandled_signals_ratelimited())
+	if (!show_unhandled_signals)
 		return;
 
 	if (!unhandled_signal(current, signr))
 		return;
 
+	if (!__ratelimit(&rs))
+		return;
+
 	pr_info("%s[%d]: %s (%d) at %lx nip %lx lr %lx code %x",
 		current->comm, current->pid, signame(signr), signr,
 		addr, regs->nip, regs->link, code);
-- 
cgit v1.2.3


From a58183138cb72059a0c278f8370a47f41dae9afa Mon Sep 17 00:00:00 2001
From: Hari Bathini <hbathini@linux.ibm.com>
Date: Sat, 18 Aug 2018 02:10:56 +0530
Subject: powerpc/fadump: cleanup crash memory ranges support

Commit 1bd6a1c4b80a ("powerpc/fadump: handle crash memory ranges array
index overflow") changed crash memory ranges to a dynamic array that
is reallocated on-demand with krealloc(). The relevant header for this
call was not included. The kernel compiles though. But be cautious and
add the header anyway.

Also, memory allocation logic in fadump_add_crash_memory() takes care
of memory allocation for crash memory ranges in all scenarios. Drop
unnecessary memory allocation in fadump_setup_crash_memory_ranges().

Fixes: 1bd6a1c4b80a ("powerpc/fadump: handle crash memory ranges array index overflow")
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/fadump.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 986ec476fd5d..a711d22339ea 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -34,6 +34,7 @@
 #include <linux/crash_dump.h>
 #include <linux/kobject.h>
 #include <linux/sysfs.h>
+#include <linux/slab.h>
 
 #include <asm/debugfs.h>
 #include <asm/page.h>
@@ -1019,13 +1020,6 @@ static int fadump_setup_crash_memory_ranges(void)
 	pr_debug("Setup crash memory ranges.\n");
 	crash_mem_ranges = 0;
 
-	/* allocate memory for crash memory ranges for the first time */
-	if (!max_crash_mem_ranges) {
-		ret = allocate_crash_memory_ranges();
-		if (ret)
-			return ret;
-	}
-
 	/*
 	 * add the first memory chunk (RMA_START through boot_memory_size) as
 	 * a separate memory chunk. The reason is, at the time crash firmware
-- 
cgit v1.2.3


From 2ea62630681027c455117aa471ea3ab8bb099ead Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Fri, 17 Aug 2018 20:24:39 +0530
Subject: powerpc/topology: Get topology for shared processors at boot

On a shared LPAR, Phyp will not update the CPU associativity at boot
time. Just after the boot system does recognize itself as a shared
LPAR and trigger a request for correct CPU associativity. But by then
the scheduler would have already created/destroyed its sched domains.

This causes
  - Broken load balance across Nodes causing islands of cores.
  - Performance degradation esp if the system is lightly loaded
  - dmesg to wrongly report all CPUs to be in Node 0.
  - Messages in dmesg saying borken topology.
  - With commit 051f3ca02e46 ("sched/topology: Introduce NUMA identity
    node sched domain"), can cause rcu stalls at boot up.

The sched_domains_numa_masks table which is used to generate cpumasks
is only created at boot time just before creating sched domains and
never updated. Hence, its better to get the topology correct before
the sched domains are created.

For example on 64 core Power 8 shared LPAR, dmesg reports

  Brought up 512 CPUs
  Node 0 CPUs: 0-511
  Node 1 CPUs:
  Node 2 CPUs:
  Node 3 CPUs:
  Node 4 CPUs:
  Node 5 CPUs:
  Node 6 CPUs:
  Node 7 CPUs:
  Node 8 CPUs:
  Node 9 CPUs:
  Node 10 CPUs:
  Node 11 CPUs:
  ...
  BUG: arch topology borken
       the DIE domain not a subset of the NUMA domain
  BUG: arch topology borken
       the DIE domain not a subset of the NUMA domain

numactl/lscpu output will still be correct with cores spreading across
all nodes:

  Socket(s):             64
  NUMA node(s):          12
  Model:                 2.0 (pvr 004d 0200)
  Model name:            POWER8 (architected), altivec supported
  Hypervisor vendor:     pHyp
  Virtualization type:   para
  L1d cache:             64K
  L1i cache:             32K
  NUMA node0 CPU(s): 0-7,32-39,64-71,96-103,176-183,272-279,368-375,464-471
  NUMA node1 CPU(s): 8-15,40-47,72-79,104-111,184-191,280-287,376-383,472-479
  NUMA node2 CPU(s): 16-23,48-55,80-87,112-119,192-199,288-295,384-391,480-487
  NUMA node3 CPU(s): 24-31,56-63,88-95,120-127,200-207,296-303,392-399,488-495
  NUMA node4 CPU(s):     208-215,304-311,400-407,496-503
  NUMA node5 CPU(s):     168-175,264-271,360-367,456-463
  NUMA node6 CPU(s):     128-135,224-231,320-327,416-423
  NUMA node7 CPU(s):     136-143,232-239,328-335,424-431
  NUMA node8 CPU(s):     216-223,312-319,408-415,504-511
  NUMA node9 CPU(s):     144-151,240-247,336-343,432-439
  NUMA node10 CPU(s):    152-159,248-255,344-351,440-447
  NUMA node11 CPU(s):    160-167,256-263,352-359,448-455

Currently on this LPAR, the scheduler detects 2 levels of Numa and
created numa sched domains for all CPUs, but it finds a single DIE
domain consisting of all CPUs. Hence it deletes all numa sched
domains.

To address this, detect the shared processor and update topology soon
after CPUs are setup so that correct topology is updated just before
scheduler creates sched domain.

With the fix, dmesg reports:

  numa: Node 0 CPUs: 0-7 32-39 64-71 96-103 176-183 272-279 368-375 464-471
  numa: Node 1 CPUs: 8-15 40-47 72-79 104-111 184-191 280-287 376-383 472-479
  numa: Node 2 CPUs: 16-23 48-55 80-87 112-119 192-199 288-295 384-391 480-487
  numa: Node 3 CPUs: 24-31 56-63 88-95 120-127 200-207 296-303 392-399 488-495
  numa: Node 4 CPUs: 208-215 304-311 400-407 496-503
  numa: Node 5 CPUs: 168-175 264-271 360-367 456-463
  numa: Node 6 CPUs: 128-135 224-231 320-327 416-423
  numa: Node 7 CPUs: 136-143 232-239 328-335 424-431
  numa: Node 8 CPUs: 216-223 312-319 408-415 504-511
  numa: Node 9 CPUs: 144-151 240-247 336-343 432-439
  numa: Node 10 CPUs: 152-159 248-255 344-351 440-447
  numa: Node 11 CPUs: 160-167 256-263 352-359 448-455

and lscpu also reports:

  Socket(s):             64
  NUMA node(s):          12
  Model:                 2.0 (pvr 004d 0200)
  Model name:            POWER8 (architected), altivec supported
  Hypervisor vendor:     pHyp
  Virtualization type:   para
  L1d cache:             64K
  L1i cache:             32K
  NUMA node0 CPU(s): 0-7,32-39,64-71,96-103,176-183,272-279,368-375,464-471
  NUMA node1 CPU(s): 8-15,40-47,72-79,104-111,184-191,280-287,376-383,472-479
  NUMA node2 CPU(s): 16-23,48-55,80-87,112-119,192-199,288-295,384-391,480-487
  NUMA node3 CPU(s): 24-31,56-63,88-95,120-127,200-207,296-303,392-399,488-495
  NUMA node4 CPU(s):     208-215,304-311,400-407,496-503
  NUMA node5 CPU(s):     168-175,264-271,360-367,456-463
  NUMA node6 CPU(s):     128-135,224-231,320-327,416-423
  NUMA node7 CPU(s):     136-143,232-239,328-335,424-431
  NUMA node8 CPU(s):     216-223,312-319,408-415,504-511
  NUMA node9 CPU(s):     144-151,240-247,336-343,432-439
  NUMA node10 CPU(s):    152-159,248-255,344-351,440-447
  NUMA node11 CPU(s):    160-167,256-263,352-359,448-455

Reported-by: Manjunatha H R <manjuhr1@in.ibm.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
[mpe: Trim / format change log]
Tested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/topology.h |  5 +++++
 arch/powerpc/kernel/smp.c           |  5 +++++
 arch/powerpc/mm/numa.c              | 20 ++++++++++----------
 3 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'arch/powerpc/kernel')

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 16b077801a5f..a4a718dbfec6 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -92,6 +92,7 @@ extern int stop_topology_update(void);
 extern int prrn_is_enabled(void);
 extern int find_and_online_cpu_nid(int cpu);
 extern int timed_topology_update(int nsecs);
+extern void __init shared_proc_topology_init(void);
 #else
 static inline int start_topology_update(void)
 {
@@ -113,6 +114,10 @@ static inline int timed_topology_update(int nsecs)
 {
 	return 0;
 }
+
+#ifdef CONFIG_SMP
+static inline void shared_proc_topology_init(void) {}
+#endif
 #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
 
 #include <asm-generic/topology.h>
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index b19d832ef386..61c1fadbc644 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1160,6 +1160,11 @@ void __init smp_cpus_done(unsigned int max_cpus)
 	if (smp_ops && smp_ops->bringup_done)
 		smp_ops->bringup_done();
 
+	/*
+	 * On a shared LPAR, associativity needs to be requested.
+	 * Hence, get numa topology before dumping cpu topology
+	 */
+	shared_proc_topology_init();
 	dump_numa_cpu_topology();
 
 	/*
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 0c7e05d89244..35ac5422903a 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1078,7 +1078,6 @@ static int prrn_enabled;
 static void reset_topology_timer(void);
 static int topology_timer_secs = 1;
 static int topology_inited;
-static int topology_update_needed;
 
 /*
  * Change polling interval for associativity changes.
@@ -1306,11 +1305,8 @@ int numa_update_cpu_topology(bool cpus_locked)
 	struct device *dev;
 	int weight, new_nid, i = 0;
 
-	if (!prrn_enabled && !vphn_enabled) {
-		if (!topology_inited)
-			topology_update_needed = 1;
+	if (!prrn_enabled && !vphn_enabled && topology_inited)
 		return 0;
-	}
 
 	weight = cpumask_weight(&cpu_associativity_changes_mask);
 	if (!weight)
@@ -1423,7 +1419,6 @@ int numa_update_cpu_topology(bool cpus_locked)
 
 out:
 	kfree(updates);
-	topology_update_needed = 0;
 	return changed;
 }
 
@@ -1551,6 +1546,15 @@ int prrn_is_enabled(void)
 	return prrn_enabled;
 }
 
+void __init shared_proc_topology_init(void)
+{
+	if (lppaca_shared_proc(get_lppaca())) {
+		bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask),
+			    nr_cpumask_bits);
+		numa_update_cpu_topology(false);
+	}
+}
+
 static int topology_read(struct seq_file *file, void *v)
 {
 	if (vphn_enabled || prrn_enabled)
@@ -1608,10 +1612,6 @@ static int topology_update_init(void)
 		return -ENOMEM;
 
 	topology_inited = 1;
-	if (topology_update_needed)
-		bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask),
-					nr_cpumask_bits);
-
 	return 0;
 }
 device_initcall(topology_update_init);
-- 
cgit v1.2.3