From ff69a4c855066592f9e293cff8f54813614dd544 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 26 Jul 2013 14:55:59 +0100
Subject: ARM: constify machine_desc structure uses

struct machine_desc records are defined everywhere as a 'const'
structure, but unfortuantely it loses its const-ness through the use of
linker magic - the symbols which surround the section are not declared
const so it becomes possible not to use 'const' for pointers to these
const structures.

Let's fix this oversight - all pointers to these structures should be
marked const too.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/init.c  | 5 +++--
 arch/arm/mm/mmu.c   | 4 ++--
 arch/arm/mm/nommu.c | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 15225d829d71..2958e74fc42c 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -231,7 +231,7 @@ static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long *hole,
 }
 #endif
 
-void __init setup_dma_zone(struct machine_desc *mdesc)
+void __init setup_dma_zone(const struct machine_desc *mdesc)
 {
 #ifdef CONFIG_ZONE_DMA
 	if (mdesc->dma_zone_size) {
@@ -335,7 +335,8 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align)
 	return phys;
 }
 
-void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
+void __init arm_memblock_init(struct meminfo *mi,
+	const struct machine_desc *mdesc)
 {
 	int i;
 
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4f56617a2392..56054ac8348e 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1151,7 +1151,7 @@ void __init arm_mm_memblock_reserve(void)
  * called function.  This means you can't use any function or debugging
  * method which may touch any device, otherwise the kernel _will_ crash.
  */
-static void __init devicemaps_init(struct machine_desc *mdesc)
+static void __init devicemaps_init(const struct machine_desc *mdesc)
 {
 	struct map_desc map;
 	unsigned long addr;
@@ -1272,7 +1272,7 @@ static void __init map_lowmem(void)
  * paging_init() sets up the page tables, initialises the zone memory
  * maps, and sets up the zero page, bad page and bad page tables.
  */
-void __init paging_init(struct machine_desc *mdesc)
+void __init paging_init(const struct machine_desc *mdesc)
 {
 	void *zero_page;
 
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 1fa50100ab6a..34d4ab217bab 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -299,7 +299,7 @@ void __init sanity_check_meminfo(void)
  * paging_init() sets up the page tables, initialises the zone memory
  * maps, and sets up the zero page, bad page and bad page tables.
  */
-void __init paging_init(struct machine_desc *mdesc)
+void __init paging_init(const struct machine_desc *mdesc)
 {
 	early_trap_init((void *)CONFIG_VECTORS_BASE);
 	mpu_setup();
-- 
cgit v1.2.3


From 4bfab2034bab9374eba1921cf7bd51fd8d48661b Mon Sep 17 00:00:00 2001
From: Steven Capper <steve.capper@linaro.org>
Date: Fri, 26 Jul 2013 14:58:22 +0100
Subject: ARM: 7792/1: mm: Remove general hugetlb code from ARM

General forms of huge_pte_alloc, huge_pte_offset and follow_huge_pmd
are now available in mm/hugetlb.c.

This patch removes the ARM copies of these functions and activates
the general ones by enabling:
CONFIG_ARCH_WANT_GENERAL_HUGETLB

Signed-off-by: Steve Capper <steve.capper@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig          |  3 +++
 arch/arm/mm/hugetlbpage.c | 43 -------------------------------------------
 2 files changed, 3 insertions(+), 43 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 1e987404d2b0..1b3a30361f9c 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1793,6 +1793,9 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE
        def_bool y
        depends on ARM_LPAE
 
+config ARCH_WANT_GENERAL_HUGETLB
+	def_bool y
+
 source "mm/Kconfig"
 
 config FORCE_MAX_ZONEORDER
diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c
index 3d1e4a205b0b..66781bf34077 100644
--- a/arch/arm/mm/hugetlbpage.c
+++ b/arch/arm/mm/hugetlbpage.c
@@ -36,22 +36,6 @@
  * of type casting from pmd_t * to pte_t *.
  */
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd = NULL;
-
-	pgd = pgd_offset(mm, addr);
-	if (pgd_present(*pgd)) {
-		pud = pud_offset(pgd, addr);
-		if (pud_present(*pud))
-			pmd = pmd_offset(pud, addr);
-	}
-
-	return (pte_t *)pmd;
-}
-
 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
 			      int write)
 {
@@ -68,33 +52,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 	return 0;
 }
 
-pte_t *huge_pte_alloc(struct mm_struct *mm,
-			unsigned long addr, unsigned long sz)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pte_t *pte = NULL;
-
-	pgd = pgd_offset(mm, addr);
-	pud = pud_alloc(mm, pgd, addr);
-	if (pud)
-		pte = (pte_t *)pmd_alloc(mm, pud, addr);
-
-	return pte;
-}
-
-struct page *
-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
-		pmd_t *pmd, int write)
-{
-	struct page *page;
-
-	page = pte_page(*(pte_t *)pmd);
-	if (page)
-		page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
-	return page;
-}
-
 int pmd_huge(pmd_t pmd)
 {
 	return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
-- 
cgit v1.2.3


From 792a843a9f353d3e2474b6f5057b7eaecba41675 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 23 May 2013 18:24:21 +0100
Subject: ARM: mm: remove redundant dsb() prior to range TLB invalidation

The kernel TLB range invalidation functions already contain dsb
instructions before and after the maintenance, so there is no need to
introduce additional barriers.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mm/dma-mapping.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 7f9b1798c6cf..0c17f69a8286 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -455,7 +455,6 @@ static void __dma_remap(struct page *page, size_t size, pgprot_t prot)
 	unsigned end = start + size;
 
 	apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot);
-	dsb();
 	flush_tlb_kernel_range(start, end);
 }
 
-- 
cgit v1.2.3


From f0915781bd5edf78b1154e61efe962dc15872d09 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 11 Feb 2013 13:47:48 +0000
Subject: ARM: tlb: don't perform inner-shareable invalidation for local TLB
 ops

Inner-shareable TLB invalidation is typically more expensive than local
(non-shareable) invalidation, so performing the broadcasting for
local_flush_tlb_* operations is a waste of cycles and needlessly
clobbers entries in the TLBs of other CPUs.

This patch introduces __flush_tlb_* versions for many of the TLB
invalidation functions, which only respect inner-shareable variants of
the invalidation instructions when presented with the TLB_V7_UIS_FULL
flag. The local version is also inlined to prevent SMP_ON_UP kernels
from missing flushes, where the __flush variant would be called with
the UP flags.

This gains us around 0.5% in hackbench scores for a dual-core A15, but I
would expect this to improve as more cores (and clusters) are added to
the equation.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Albin Tonnerre <Albin.Tonnerre@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/tlbflush.h | 138 ++++++++++++++++++++++++++++++++++------
 arch/arm/kernel/smp_tlb.c       |   8 +--
 arch/arm/mm/context.c           |   7 +-
 3 files changed, 123 insertions(+), 30 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index f467e9b3f8d5..3316264916e9 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -319,6 +319,16 @@ extern struct cpu_tlb_fns cpu_tlb;
 #define tlb_op(f, regs, arg)	__tlb_op(f, "p15, 0, %0, " regs, arg)
 #define tlb_l2_op(f, regs, arg)	__tlb_op(f, "p15, 1, %0, " regs, arg)
 
+static inline void __local_flush_tlb_all(void)
+{
+	const int zero = 0;
+	const unsigned int __tlb_flag = __cpu_tlb_flags;
+
+	tlb_op(TLB_V4_U_FULL | TLB_V6_U_FULL, "c8, c7, 0", zero);
+	tlb_op(TLB_V4_D_FULL | TLB_V6_D_FULL, "c8, c6, 0", zero);
+	tlb_op(TLB_V4_I_FULL | TLB_V6_I_FULL, "c8, c5, 0", zero);
+}
+
 static inline void local_flush_tlb_all(void)
 {
 	const int zero = 0;
@@ -327,10 +337,8 @@ static inline void local_flush_tlb_all(void)
 	if (tlb_flag(TLB_WB))
 		dsb();
 
-	tlb_op(TLB_V4_U_FULL | TLB_V6_U_FULL, "c8, c7, 0", zero);
-	tlb_op(TLB_V4_D_FULL | TLB_V6_D_FULL, "c8, c6, 0", zero);
-	tlb_op(TLB_V4_I_FULL | TLB_V6_I_FULL, "c8, c5, 0", zero);
-	tlb_op(TLB_V7_UIS_FULL, "c8, c3, 0", zero);
+	__local_flush_tlb_all();
+	tlb_op(TLB_V7_UIS_FULL, "c8, c7, 0", zero);
 
 	if (tlb_flag(TLB_BARRIER)) {
 		dsb();
@@ -338,31 +346,69 @@ static inline void local_flush_tlb_all(void)
 	}
 }
 
-static inline void local_flush_tlb_mm(struct mm_struct *mm)
+static inline void __flush_tlb_all(void)
 {
 	const int zero = 0;
-	const int asid = ASID(mm);
 	const unsigned int __tlb_flag = __cpu_tlb_flags;
 
 	if (tlb_flag(TLB_WB))
 		dsb();
 
+	__local_flush_tlb_all();
+	tlb_op(TLB_V7_UIS_FULL, "c8, c3, 0", zero);
+
+	if (tlb_flag(TLB_BARRIER)) {
+		dsb();
+		isb();
+	}
+}
+
+static inline void __local_flush_tlb_mm(struct mm_struct *mm)
+{
+	const int zero = 0;
+	const int asid = ASID(mm);
+	const unsigned int __tlb_flag = __cpu_tlb_flags;
+
 	if (possible_tlb_flags & (TLB_V4_U_FULL|TLB_V4_D_FULL|TLB_V4_I_FULL)) {
-		if (cpumask_test_cpu(get_cpu(), mm_cpumask(mm))) {
+		if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
 			tlb_op(TLB_V4_U_FULL, "c8, c7, 0", zero);
 			tlb_op(TLB_V4_D_FULL, "c8, c6, 0", zero);
 			tlb_op(TLB_V4_I_FULL, "c8, c5, 0", zero);
 		}
-		put_cpu();
 	}
 
 	tlb_op(TLB_V6_U_ASID, "c8, c7, 2", asid);
 	tlb_op(TLB_V6_D_ASID, "c8, c6, 2", asid);
 	tlb_op(TLB_V6_I_ASID, "c8, c5, 2", asid);
+}
+
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	const int asid = ASID(mm);
+	const unsigned int __tlb_flag = __cpu_tlb_flags;
+
+	if (tlb_flag(TLB_WB))
+		dsb();
+
+	__local_flush_tlb_mm(mm);
+	tlb_op(TLB_V7_UIS_ASID, "c8, c7, 2", asid);
+
+	if (tlb_flag(TLB_BARRIER))
+		dsb();
+}
+
+static inline void __flush_tlb_mm(struct mm_struct *mm)
+{
+	const unsigned int __tlb_flag = __cpu_tlb_flags;
+
+	if (tlb_flag(TLB_WB))
+		dsb();
+
+	__local_flush_tlb_mm(mm);
 #ifdef CONFIG_ARM_ERRATA_720789
-	tlb_op(TLB_V7_UIS_ASID, "c8, c3, 0", zero);
+	tlb_op(TLB_V7_UIS_ASID, "c8, c3, 0", 0);
 #else
-	tlb_op(TLB_V7_UIS_ASID, "c8, c3, 2", asid);
+	tlb_op(TLB_V7_UIS_ASID, "c8, c3, 2", ASID(mm));
 #endif
 
 	if (tlb_flag(TLB_BARRIER))
@@ -370,16 +416,13 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
 }
 
 static inline void
-local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
+__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 {
 	const int zero = 0;
 	const unsigned int __tlb_flag = __cpu_tlb_flags;
 
 	uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm);
 
-	if (tlb_flag(TLB_WB))
-		dsb();
-
 	if (possible_tlb_flags & (TLB_V4_U_PAGE|TLB_V4_D_PAGE|TLB_V4_I_PAGE|TLB_V4_I_FULL) &&
 	    cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
 		tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", uaddr);
@@ -392,6 +435,36 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 	tlb_op(TLB_V6_U_PAGE, "c8, c7, 1", uaddr);
 	tlb_op(TLB_V6_D_PAGE, "c8, c6, 1", uaddr);
 	tlb_op(TLB_V6_I_PAGE, "c8, c5, 1", uaddr);
+}
+
+static inline void
+local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
+{
+	const unsigned int __tlb_flag = __cpu_tlb_flags;
+
+	uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm);
+
+	if (tlb_flag(TLB_WB))
+		dsb();
+
+	__local_flush_tlb_page(vma, uaddr);
+	tlb_op(TLB_V7_UIS_PAGE, "c8, c7, 1", uaddr);
+
+	if (tlb_flag(TLB_BARRIER))
+		dsb();
+}
+
+static inline void
+__flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
+{
+	const unsigned int __tlb_flag = __cpu_tlb_flags;
+
+	uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm);
+
+	if (tlb_flag(TLB_WB))
+		dsb();
+
+	__local_flush_tlb_page(vma, uaddr);
 #ifdef CONFIG_ARM_ERRATA_720789
 	tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 3", uaddr & PAGE_MASK);
 #else
@@ -402,16 +475,11 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 		dsb();
 }
 
-static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
+static inline void __local_flush_tlb_kernel_page(unsigned long kaddr)
 {
 	const int zero = 0;
 	const unsigned int __tlb_flag = __cpu_tlb_flags;
 
-	kaddr &= PAGE_MASK;
-
-	if (tlb_flag(TLB_WB))
-		dsb();
-
 	tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", kaddr);
 	tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", kaddr);
 	tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", kaddr);
@@ -421,6 +489,36 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
 	tlb_op(TLB_V6_U_PAGE, "c8, c7, 1", kaddr);
 	tlb_op(TLB_V6_D_PAGE, "c8, c6, 1", kaddr);
 	tlb_op(TLB_V6_I_PAGE, "c8, c5, 1", kaddr);
+}
+
+static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
+{
+	const unsigned int __tlb_flag = __cpu_tlb_flags;
+
+	kaddr &= PAGE_MASK;
+
+	if (tlb_flag(TLB_WB))
+		dsb();
+
+	__local_flush_tlb_kernel_page(kaddr);
+	tlb_op(TLB_V7_UIS_PAGE, "c8, c7, 1", kaddr);
+
+	if (tlb_flag(TLB_BARRIER)) {
+		dsb();
+		isb();
+	}
+}
+
+static inline void __flush_tlb_kernel_page(unsigned long kaddr)
+{
+	const unsigned int __tlb_flag = __cpu_tlb_flags;
+
+	kaddr &= PAGE_MASK;
+
+	if (tlb_flag(TLB_WB))
+		dsb();
+
+	__local_flush_tlb_kernel_page(kaddr);
 	tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 1", kaddr);
 
 	if (tlb_flag(TLB_BARRIER)) {
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index c2edfff573c2..5883b8ae77c8 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -104,7 +104,7 @@ void flush_tlb_all(void)
 	if (tlb_ops_need_broadcast())
 		on_each_cpu(ipi_flush_tlb_all, NULL, 1);
 	else
-		local_flush_tlb_all();
+		__flush_tlb_all();
 	broadcast_tlb_a15_erratum();
 }
 
@@ -113,7 +113,7 @@ void flush_tlb_mm(struct mm_struct *mm)
 	if (tlb_ops_need_broadcast())
 		on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1);
 	else
-		local_flush_tlb_mm(mm);
+		__flush_tlb_mm(mm);
 	broadcast_tlb_mm_a15_erratum(mm);
 }
 
@@ -126,7 +126,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 		on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page,
 					&ta, 1);
 	} else
-		local_flush_tlb_page(vma, uaddr);
+		__flush_tlb_page(vma, uaddr);
 	broadcast_tlb_mm_a15_erratum(vma->vm_mm);
 }
 
@@ -137,7 +137,7 @@ void flush_tlb_kernel_page(unsigned long kaddr)
 		ta.ta_start = kaddr;
 		on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1);
 	} else
-		local_flush_tlb_kernel_page(kaddr);
+		__flush_tlb_kernel_page(kaddr);
 	broadcast_tlb_a15_erratum();
 }
 
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 4a0544492f10..84e6f772e204 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -162,10 +162,7 @@ static void flush_context(unsigned int cpu)
 	}
 
 	/* Queue a TLB invalidate and flush the I-cache if necessary. */
-	if (!tlb_ops_need_broadcast())
-		cpumask_set_cpu(cpu, &tlb_flush_pending);
-	else
-		cpumask_setall(&tlb_flush_pending);
+	cpumask_setall(&tlb_flush_pending);
 
 	if (icache_is_vivt_asid_tagged())
 		__flush_icache_all();
@@ -245,8 +242,6 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
 	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) {
 		local_flush_bp_all();
 		local_flush_tlb_all();
-		if (erratum_a15_798181())
-			dummy_flush_tlb_a15_erratum();
 	}
 
 	atomic64_set(&per_cpu(active_asids, cpu), asid);
-- 
cgit v1.2.3


From 6abdd491698a27f7df04a32ca12cc453810e4396 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 13 May 2013 12:01:12 +0100
Subject: ARM: mm: use inner-shareable barriers for TLB and user cache
 operations

System-wide barriers aren't required for situations where we only need
to make visibility and ordering guarantees in the inner-shareable domain
(i.e. we are not dealing with devices or potentially incoherent CPUs).

This patch changes the v7 TLB operations, coherent_user_range and
dcache_clean_area functions to user inner-shareable barriers. For cache
maintenance, only the store access type is required to ensure completion.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mm/cache-v7.S | 4 ++--
 arch/arm/mm/proc-v7.S  | 2 +-
 arch/arm/mm/tlb-v7.S   | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 515b00064da8..b5c467a65c27 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -282,7 +282,7 @@ ENTRY(v7_coherent_user_range)
 	add	r12, r12, r2
 	cmp	r12, r1
 	blo	1b
-	dsb
+	dsb	ishst
 	icache_line_size r2, r3
 	sub	r3, r2, #1
 	bic	r12, r0, r3
@@ -294,7 +294,7 @@ ENTRY(v7_coherent_user_range)
 	mov	r0, #0
 	ALT_SMP(mcr	p15, 0, r0, c7, c1, 6)	@ invalidate BTB Inner Shareable
 	ALT_UP(mcr	p15, 0, r0, c7, c5, 6)	@ invalidate BTB
-	dsb
+	dsb	ishst
 	isb
 	mov	pc, lr
 
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 73398bcf9bd8..0b5462a941a6 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -83,7 +83,7 @@ ENTRY(cpu_v7_dcache_clean_area)
 	add	r0, r0, r2
 	subs	r1, r1, r2
 	bhi	2b
-	dsb
+	dsb	ishst
 	mov	pc, lr
 ENDPROC(cpu_v7_dcache_clean_area)
 
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
index ea94765acf9a..355308767bae 100644
--- a/arch/arm/mm/tlb-v7.S
+++ b/arch/arm/mm/tlb-v7.S
@@ -35,7 +35,7 @@
 ENTRY(v7wbi_flush_user_tlb_range)
 	vma_vm_mm r3, r2			@ get vma->vm_mm
 	mmid	r3, r3				@ get vm_mm->context.id
-	dsb
+	dsb	ish
 	mov	r0, r0, lsr #PAGE_SHIFT		@ align address
 	mov	r1, r1, lsr #PAGE_SHIFT
 	asid	r3, r3				@ mask ASID
@@ -56,7 +56,7 @@ ENTRY(v7wbi_flush_user_tlb_range)
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
-	dsb
+	dsb	ish
 	mov	pc, lr
 ENDPROC(v7wbi_flush_user_tlb_range)
 
@@ -69,7 +69,7 @@ ENDPROC(v7wbi_flush_user_tlb_range)
  *	- end   - end address (exclusive, may not be aligned)
  */
 ENTRY(v7wbi_flush_kern_tlb_range)
-	dsb
+	dsb	ish
 	mov	r0, r0, lsr #PAGE_SHIFT		@ align address
 	mov	r1, r1, lsr #PAGE_SHIFT
 	mov	r0, r0, lsl #PAGE_SHIFT
@@ -84,7 +84,7 @@ ENTRY(v7wbi_flush_kern_tlb_range)
 	add	r0, r0, #PAGE_SZ
 	cmp	r0, r1
 	blo	1b
-	dsb
+	dsb	ish
 	isb
 	mov	pc, lr
 ENDPROC(v7wbi_flush_kern_tlb_range)
-- 
cgit v1.2.3


From 9781aa8adbc13b9960b5a3a7353efc57eeb3697d Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 12 Jun 2013 09:59:59 +0100
Subject: ARM: l2x0: use -st dsb option for ordering writel_relaxed with unlock

writel_relaxed and spin_unlock are both store operations, so we only
need to enforce store ordering in the dsb.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mm/cache-l2x0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index d70e0aba0c9d..0c3fc276bd30 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -290,7 +290,7 @@ static void l2x0_disable(void)
 	raw_spin_lock_irqsave(&l2x0_lock, flags);
 	__l2x0_flush_all();
 	writel_relaxed(0, l2x0_base + L2X0_CTRL);
-	dsb();
+	dsb(st);
 	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
-- 
cgit v1.2.3


From 19a0519d3642d140bfb1bd602a34dc4f98606b19 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Date: Fri, 16 Aug 2013 10:28:24 +0100
Subject: ARM: 7818/1: feroceon: Add suspend/resume operation

Add support for suspend/resume operations. The implemented procedures
are identical to the ones for ARM926.

Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig            |  2 +-
 arch/arm/mm/proc-feroceon.S | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 32506dfa5e73..57562f8d1f92 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2240,7 +2240,7 @@ source "kernel/power/Kconfig"
 
 config ARCH_SUSPEND_POSSIBLE
 	depends on !ARCH_S5PC100
-	depends on CPU_ARM920T || CPU_ARM926T || CPU_SA1100 || \
+	depends on CPU_ARM920T || CPU_ARM926T || CPU_FEROCEON || CPU_SA1100 || \
 		CPU_V6 || CPU_V6K || CPU_V7 || CPU_XSC3 || CPU_XSCALE || CPU_MOHAWK
 	def_bool y
 
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index d5146b98c8d1..db79b62c92fb 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -514,6 +514,32 @@ ENTRY(cpu_feroceon_set_pte_ext)
 #endif
 	mov	pc, lr
 
+/* Suspend/resume support: taken from arch/arm/mm/proc-arm926.S */
+.globl	cpu_feroceon_suspend_size
+.equ	cpu_feroceon_suspend_size, 4 * 3
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_feroceon_do_suspend)
+	stmfd	sp!, {r4 - r6, lr}
+	mrc	p15, 0, r4, c13, c0, 0	@ PID
+	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
+	mrc	p15, 0, r6, c1, c0, 0	@ Control register
+	stmia	r0, {r4 - r6}
+	ldmfd	sp!, {r4 - r6, pc}
+ENDPROC(cpu_feroceon_do_suspend)
+
+ENTRY(cpu_feroceon_do_resume)
+	mov	ip, #0
+	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I+D TLBs
+	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I+D caches
+	ldmia	r0, {r4 - r6}
+	mcr	p15, 0, r4, c13, c0, 0	@ PID
+	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
+	mcr	p15, 0, r1, c2, c0, 0	@ TTB address
+	mov	r0, r6			@ control register
+	b	cpu_resume_mmu
+ENDPROC(cpu_feroceon_do_resume)
+#endif
+
 	.type	__feroceon_setup, #function
 __feroceon_setup:
 	mov	r0, #0
-- 
cgit v1.2.3


From c477b8db45aa4c4976be22c807bf43d31fecf17d Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Fri, 16 Aug 2013 13:04:32 +0100
Subject: ARM: 7820/1: mm: cache-l2x0: Print the cache size in kB

Currently we have the following output from cache-l2x0:

l2x0: 16 ways, CACHE_ID 0x410000c7, AUX_CTRL 0x32070000, Cache size: 1048576 B

Using kB for the cache size can improve readability a bit:

l2x0: 16 ways, CACHE_ID 0x410000c7, AUX_CTRL 0x32070000, Cache size: 1024 kB

While at it use pr_info.

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index d70e0aba0c9d..ad4e8825f91e 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -417,9 +417,9 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 		outer_cache.disable = l2x0_disable;
 	}
 
-	printk(KERN_INFO "%s cache controller enabled\n", type);
-	printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n",
-			ways, cache_id, aux, l2x0_size);
+	pr_info("%s cache controller enabled\n", type);
+	pr_info("l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d kB\n",
+		ways, cache_id, aux, l2x0_size >> 10);
 }
 
 #ifdef CONFIG_OF
-- 
cgit v1.2.3


From 505caa66fe8551c2c8421395c2e56a5bb02520ff Mon Sep 17 00:00:00 2001
From: Christian Daudt <csd@broadcom.com>
Date: Mon, 19 Aug 2013 23:00:45 +0100
Subject: ARM: 7821/1: DT: binding fixup to align with vendor-prefixes.txt

[ this is a follow-up to this discussion:
http://archive.arm.linux.org.uk/lurker/message/20130730.230827.a1ceb12a.en.html ]
This patchset renames all uses of "bcm," name bindings to
"brcm," as they were done prior to knowing that brcm had
already been standardized as Broadcom vendor prefix
(in Documentation/devicetree/bindings/vendor-prefixes.txt).
This will not cause any churn on devices because none of
these bindings have made it into production yet.

Acked-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Christian Daudt <csd@broadcom.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 Documentation/devicetree/bindings/arm/l2cc.txt | 4 +++-
 arch/arm/mm/cache-l2x0.c                       | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/Documentation/devicetree/bindings/arm/l2cc.txt b/Documentation/devicetree/bindings/arm/l2cc.txt
index 69ddf9fad2dc..c0c7626fd0ff 100644
--- a/Documentation/devicetree/bindings/arm/l2cc.txt
+++ b/Documentation/devicetree/bindings/arm/l2cc.txt
@@ -16,9 +16,11 @@ Required properties:
      performs the same operation).
 	"marvell,"aurora-outer-cache: Marvell Controller designed to be
 	 compatible with the ARM one with outer cache mode.
-	"bcm,bcm11351-a2-pl310-cache": For Broadcom bcm11351 chipset where an
+	"brcm,bcm11351-a2-pl310-cache": For Broadcom bcm11351 chipset where an
 	offset needs to be added to the address before passing down to the L2
 	cache controller
+	"bcm,bcm11351-a2-pl310-cache": DEPRECATED by
+	                               "brcm,bcm11351-a2-pl310-cache"
 - cache-unified : Specifies the cache is a unified cache.
 - cache-level : Should be set to 2 for a level 2 cache.
 - reg : Physical base address and size of cache controller's memory mapped
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index ad4e8825f91e..f6a4bb2d2551 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -929,7 +929,9 @@ static const struct of_device_id l2x0_ids[] __initconst = {
 	  .data = (void *)&aurora_no_outer_data},
 	{ .compatible = "marvell,aurora-outer-cache",
 	  .data = (void *)&aurora_with_outer_data},
-	{ .compatible = "bcm,bcm11351-a2-pl310-cache",
+	{ .compatible = "brcm,bcm11351-a2-pl310-cache",
+	  .data = (void *)&bcm_l2x0_data},
+	{ .compatible = "bcm,bcm11351-a2-pl310-cache", /* deprecated name */
 	  .data = (void *)&bcm_l2x0_data},
 	{}
 };
-- 
cgit v1.2.3


From 84b6504f560157ff2077dd3757eee481b81dc39b Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 20 Aug 2013 17:29:55 +0100
Subject: ARM: 7823/1: errata: workaround Cortex-A15 erratum 773022

On Cortex-A15 CPUs up to and including r0p4, in certain rare sequences
of code, the loop buffer may deliver incorrect instructions. This
workaround disables the loop buffer to avoid the erratum.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig      |  9 +++++++++
 arch/arm/mm/proc-v7.S | 14 +++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 57562f8d1f92..bd2709d55678 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1373,6 +1373,15 @@ config ARM_ERRATA_798181
 	  which sends an IPI to the CPUs that are running the same ASID
 	  as the one being invalidated.
 
+config ARM_ERRATA_773022
+	bool "ARM errata: incorrect instructions may be executed from loop buffer"
+	depends on CPU_V7
+	help
+	  This option enables the workaround for the 773022 Cortex-A15
+	  (up to r0p4) erratum. In certain rare sequences of code, the
+	  loop buffer may deliver incorrect instructions. This
+	  workaround disables the loop buffer to avoid the erratum.
+
 endmenu
 
 source "arch/arm/common/Kconfig"
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 5c6d5a3050ea..e7d45d529a21 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -329,7 +329,19 @@ __v7_setup:
 1:
 #endif
 
-3:	mov	r10, #0
+	/* Cortex-A15 Errata */
+3:	ldr	r10, =0x00000c0f		@ Cortex-A15 primary part number
+	teq	r0, r10
+	bne	4f
+
+#ifdef CONFIG_ARM_ERRATA_773022
+	cmp	r6, #0x4			@ only present up to r0p4
+	mrcle	p15, 0, r10, c1, c0, 1		@ read aux control register
+	orrle	r10, r10, #1 << 1		@ disable loop buffer
+	mcrle	p15, 0, r10, c1, c0, 1		@ write aux control register
+#endif
+
+4:	mov	r10, #0
 	mcr	p15, 0, r10, c7, c5, 0		@ I+BTB cache invalidate
 	dsb
 #ifdef CONFIG_MMU
-- 
cgit v1.2.3