From b5b4453e7912f056da1ca7572574cada32ecb60c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 14 Mar 2019 00:14:38 +1100
Subject: powerpc/vdso64: Fix CLOCK_MONOTONIC inconsistencies across Y2038

Jakub Drnec reported:
  Setting the realtime clock can sometimes make the monotonic clock go
  back by over a hundred years. Decreasing the realtime clock across
  the y2k38 threshold is one reliable way to reproduce. Allegedly this
  can also happen just by running ntpd, I have not managed to
  reproduce that other than booting with rtc at >2038 and then running
  ntp. When this happens, anything with timers (e.g. openjdk) breaks
  rather badly.

And included a test case (slightly edited for brevity):
  #define _POSIX_C_SOURCE 199309L
  #include <stdio.h>
  #include <time.h>
  #include <stdlib.h>
  #include <unistd.h>

  long get_time(void) {
    struct timespec tp;
    clock_gettime(CLOCK_MONOTONIC, &tp);
    return tp.tv_sec + tp.tv_nsec / 1000000000;
  }

  int main(void) {
    long last = get_time();
    while(1) {
      long now = get_time();
      if (now < last) {
        printf("clock went backwards by %ld seconds!\n", last - now);
      }
      last = now;
      sleep(1);
    }
    return 0;
  }

Which when run concurrently with:
 # date -s 2040-1-1
 # date -s 2037-1-1

Will detect the clock going backward.

The root cause is that wtom_clock_sec in struct vdso_data is only a
32-bit signed value, even though we set its value to be equal to
tk->wall_to_monotonic.tv_sec which is 64-bits.

Because the monotonic clock starts at zero when the system boots the
wall_to_montonic.tv_sec offset is negative for current and future
dates. Currently on a freshly booted system the offset will be in the
vicinity of negative 1.5 billion seconds.

However if the wall clock is set past the Y2038 boundary, the offset
from wall to monotonic becomes less than negative 2^31, and no longer
fits in 32-bits. When that value is assigned to wtom_clock_sec it is
truncated and becomes positive, causing the VDSO assembly code to
calculate CLOCK_MONOTONIC incorrectly.

That causes CLOCK_MONOTONIC to jump ahead by ~4 billion seconds which
it is not meant to do. Worse, if the time is then set back before the
Y2038 boundary CLOCK_MONOTONIC will jump backward.

We can fix it simply by storing the full 64-bit offset in the
vdso_data, and using that in the VDSO assembly code. We also shuffle
some of the fields in vdso_data to avoid creating a hole.

The original commit that added the CLOCK_MONOTONIC support to the VDSO
did actually use a 64-bit value for wtom_clock_sec, see commit
a7f290dad32e ("[PATCH] powerpc: Merge vdso's and add vdso support to
32 bits kernel") (Nov 2005). However just 3 days later it was
converted to 32-bits in commit 0c37ec2aa88b ("[PATCH] powerpc: vdso
fixes (take #2)"), and the bug has existed since then AFAICS.

Fixes: 0c37ec2aa88b ("[PATCH] powerpc: vdso fixes (take #2)")
Cc: stable@vger.kernel.org # v2.6.15+
Link: http://lkml.kernel.org/r/HaC.ZfES.62bwlnvAvMP.1STMMj@seznam.cz
Reported-by: Jakub Drnec <jaydee@email.cz>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/vdso_datapage.h  | 8 ++++----
 arch/powerpc/kernel/vdso64/gettimeofday.S | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h
index 1afe90ade595..bbc06bd72b1f 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -82,10 +82,10 @@ struct vdso_data {
 	__u32 icache_block_size;		/* L1 i-cache block size     */
 	__u32 dcache_log_block_size;		/* L1 d-cache log block size */
 	__u32 icache_log_block_size;		/* L1 i-cache log block size */
-	__s32 wtom_clock_sec;			/* Wall to monotonic clock */
-	__s32 wtom_clock_nsec;
-	struct timespec stamp_xtime;	/* xtime as at tb_orig_stamp */
-	__u32 stamp_sec_fraction;	/* fractional seconds of stamp_xtime */
+	__u32 stamp_sec_fraction;		/* fractional seconds of stamp_xtime */
+	__s32 wtom_clock_nsec;			/* Wall to monotonic clock nsec */
+	__s64 wtom_clock_sec;			/* Wall to monotonic clock sec */
+	struct timespec stamp_xtime;		/* xtime as at tb_orig_stamp */
    	__u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls  */
    	__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
 };
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index a4ed9edfd5f0..1f324c28705b 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -92,7 +92,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 	 * At this point, r4,r5 contain our sec/nsec values.
 	 */
 
-	lwa	r6,WTOM_CLOCK_SEC(r3)
+	ld	r6,WTOM_CLOCK_SEC(r3)
 	lwa	r9,WTOM_CLOCK_NSEC(r3)
 
 	/* We now have our result in r6,r9. We create a fake dependency
@@ -125,7 +125,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
 	bne     cr6,75f
 
 	/* CLOCK_MONOTONIC_COARSE */
-	lwa     r6,WTOM_CLOCK_SEC(r3)
+	ld	r6,WTOM_CLOCK_SEC(r3)
 	lwa     r9,WTOM_CLOCK_NSEC(r3)
 
 	/* check if counter has updated */
-- 
cgit v1.2.3


From 4622a2d43101ea2e3d54a2af090f25a5886c648b Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Mon, 11 Mar 2019 08:30:27 +0000
Subject: powerpc/6xx: fix setup and use of SPRN_SPRG_PGDIR for hash32

Not only the 603 but all 6xx need SPRN_SPRG_PGDIR to be initialised at
startup. This patch move it from __setup_cpu_603() to start_here()
and __secondary_start(), close to the initialisation of SPRN_THREAD.

Previously, virt addr of PGDIR was retrieved from thread struct.
Now that it is the phys addr which is stored in SPRN_SPRG_PGDIR,
hash_page() shall not convert it to phys anymore.
This patch removes the conversion.

Fixes: 93c4a162b014 ("powerpc/6xx: Store PGDIR physical address in a SPRG")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/cpu_setup_6xx.S | 3 ---
 arch/powerpc/kernel/head_32.S       | 6 ++++++
 arch/powerpc/mm/hash_low_32.S       | 8 ++++----
 3 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index 6f1c11e0691f..7534ecff5e92 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -24,9 +24,6 @@ BEGIN_MMU_FTR_SECTION
 	li	r10,0
 	mtspr	SPRN_SPRG_603_LRU,r10		/* init SW LRU tracking */
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
-	lis	r10, (swapper_pg_dir - PAGE_OFFSET)@h
-	ori	r10, r10, (swapper_pg_dir - PAGE_OFFSET)@l
-	mtspr	SPRN_SPRG_PGDIR, r10
 
 BEGIN_FTR_SECTION
 	bl	__init_fpu_registers
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index ce6a972f2584..48051c8977c5 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -855,6 +855,9 @@ __secondary_start:
 	li	r3,0
 	stw	r3, RTAS_SP(r4)		/* 0 => not in RTAS */
 #endif
+	lis	r4, (swapper_pg_dir - PAGE_OFFSET)@h
+	ori	r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
+	mtspr	SPRN_SPRG_PGDIR, r4
 
 	/* enable MMU and jump to start_secondary */
 	li	r4,MSR_KERNEL
@@ -942,6 +945,9 @@ start_here:
 	li	r3,0
 	stw	r3, RTAS_SP(r4)		/* 0 => not in RTAS */
 #endif
+	lis	r4, (swapper_pg_dir - PAGE_OFFSET)@h
+	ori	r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
+	mtspr	SPRN_SPRG_PGDIR, r4
 
 	/* stack */
 	lis	r1,init_thread_union@ha
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index 1f13494efb2b..a6c491f18a04 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -70,12 +70,12 @@ _GLOBAL(hash_page)
 	lis	r0,KERNELBASE@h		/* check if kernel address */
 	cmplw	0,r4,r0
 	ori	r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
-	mfspr	r5, SPRN_SPRG_PGDIR	/* virt page-table root */
+	mfspr	r5, SPRN_SPRG_PGDIR	/* phys page-table root */
 	blt+	112f			/* assume user more likely */
-	lis	r5,swapper_pg_dir@ha	/* if kernel address, use */
-	addi	r5,r5,swapper_pg_dir@l	/* kernel page table */
+	lis	r5, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
+	addi	r5 ,r5 ,(swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
 	rlwimi	r3,r9,32-12,29,29	/* MSR_PR -> _PAGE_USER */
-112:	tophys(r5, r5)
+112:
 #ifndef CONFIG_PTE_64BIT
 	rlwimi	r5,r4,12,20,29		/* insert top 10 bits of address */
 	lwz	r8,0(r5)		/* get pmd entry */
-- 
cgit v1.2.3


From 8bc086899816214fbc6047c9c7e15fcab49552bf Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sun, 17 Mar 2019 01:17:56 +0000
Subject: powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM configurations

MAX_PHYSMEM_BITS only needs to be defined if CONFIG_SPARSEMEM is
enabled, and that was the case before commit 4ffe713b7587
("powerpc/mm: Increase the max addressable memory to 2PB").

On 32-bit systems, where CONFIG_SPARSEMEM is not enabled, we now
define it as 46.  That is larger than the real number of physical
address bits, and breaks calculations in zsmalloc:

  mm/zsmalloc.c:130:49: warning: right shift count is negative
    MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
                                                   ^~
  ...
  mm/zsmalloc.c:253:21: error: variably modified 'size_class' at file scope
    struct size_class *size_class[ZS_SIZE_CLASSES];
                       ^~~~~~~~~~

Fixes: 4ffe713b7587 ("powerpc/mm: Increase the max addressable memory to 2PB")
Cc: stable@vger.kernel.org # v4.20+
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/mmu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index d34ad1657d7b..598cdcdd1355 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -352,7 +352,7 @@ static inline bool strict_kernel_rwx_enabled(void)
 #if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) &&	\
 	defined (CONFIG_PPC_64K_PAGES)
 #define MAX_PHYSMEM_BITS        51
-#else
+#elif defined(CONFIG_SPARSEMEM)
 #define MAX_PHYSMEM_BITS        46
 #endif
 
-- 
cgit v1.2.3


From 92edf8df0ff2ae86cc632eeca0e651fd8431d40d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 21 Mar 2019 15:24:33 +1100
Subject: powerpc/security: Fix spectre_v2 reporting

When I updated the spectre_v2 reporting to handle software count cache
flush I got the logic wrong when there's no software count cache
enabled at all.

The result is that on systems with the software count cache flush
disabled we print:

  Mitigation: Indirect branch cache disabled, Software count cache flush

Which correctly indicates that the count cache is disabled, but
incorrectly says the software count cache flush is enabled.

The root of the problem is that we are trying to handle all
combinations of options. But we know now that we only expect to see
the software count cache flush enabled if the other options are false.

So split the two cases, which simplifies the logic and fixes the bug.
We were also missing a space before "(hardware accelerated)".

The result is we see one of:

  Mitigation: Indirect branch serialisation (kernel only)
  Mitigation: Indirect branch cache disabled
  Mitigation: Software count cache flush
  Mitigation: Software count cache flush (hardware accelerated)

Fixes: ee13cb249fab ("powerpc/64s: Add support for software count cache flush")
Cc: stable@vger.kernel.org # v4.19+
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Michael Neuling <mikey@neuling.org>
Reviewed-by: Diana Craciun <diana.craciun@nxp.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/security.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 9b8631533e02..b33bafb8fcea 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -190,29 +190,22 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
 	bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED);
 	ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED);
 
-	if (bcs || ccd || count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) {
-		bool comma = false;
+	if (bcs || ccd) {
 		seq_buf_printf(&s, "Mitigation: ");
 
-		if (bcs) {
+		if (bcs)
 			seq_buf_printf(&s, "Indirect branch serialisation (kernel only)");
-			comma = true;
-		}
 
-		if (ccd) {
-			if (comma)
-				seq_buf_printf(&s, ", ");
-			seq_buf_printf(&s, "Indirect branch cache disabled");
-			comma = true;
-		}
-
-		if (comma)
+		if (bcs && ccd)
 			seq_buf_printf(&s, ", ");
 
-		seq_buf_printf(&s, "Software count cache flush");
+		if (ccd)
+			seq_buf_printf(&s, "Indirect branch cache disabled");
+	} else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) {
+		seq_buf_printf(&s, "Mitigation: Software count cache flush");
 
 		if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW)
-			seq_buf_printf(&s, "(hardware accelerated)");
+			seq_buf_printf(&s, " (hardware accelerated)");
 	} else if (btb_flush_enabled) {
 		seq_buf_printf(&s, "Mitigation: Branch predictor state flush");
 	} else {
-- 
cgit v1.2.3