From e651eab0af88aa7a281fe9e8c36c0846552aa7fc Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Mon, 18 Mar 2013 12:24:04 +0100 Subject: ARM: 7677/1: LPAE: Fix mapping in alloc_init_section for unaligned addresses With LPAE enabled, alloc_init_section() does not map the entire address space for unaligned addresses. The issue also reproduced with CMA + LPAE. CMA tries to map 16MB with page granularity mappings during boot. alloc_init_pte() is called and out of 16MB, only 2MB gets mapped and rest remains unaccessible. Because of this OMAP5 boot is broken with CMA + LPAE enabled. Fix the issue by ensuring that the entire addresses are mapped. Signed-off-by: R Sricharan Cc: Catalin Marinas Cc: Christoffer Dall Cc: Santosh Shilimkar Tested-by: Laura Abbott Acked-by: Catalin Marinas Acked-by: Christoffer Dall Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 73 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 26 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index e95a996ab78f..78978945492a 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -598,39 +598,60 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void __init alloc_init_section(pud_t *pud, unsigned long addr, - unsigned long end, phys_addr_t phys, - const struct mem_type *type) +static void __init map_init_section(pmd_t *pmd, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type) { - pmd_t *pmd = pmd_offset(pud, addr); - +#ifndef CONFIG_ARM_LPAE /* - * Try a section mapping - end, addr and phys must all be aligned - * to a section boundary. Note that PMDs refer to the individual - * L1 entries, whereas PGDs refer to a group of L1 entries making - * up one logical pointer to an L2 table. + * In classic MMU format, puds and pmds are folded in to + * the pgds. pmd_offset gives the PGD entry. PGDs refer to a + * group of L1 entries making up one logical pointer to + * an L2 table (2MB), where as PMDs refer to the individual + * L1 entries (1MB). Hence increment to get the correct + * offset for odd 1MB sections. + * (See arch/arm/include/asm/pgtable-2level.h) */ - if (type->prot_sect && ((addr | end | phys) & ~SECTION_MASK) == 0) { - pmd_t *p = pmd; - -#ifndef CONFIG_ARM_LPAE - if (addr & SECTION_SIZE) - pmd++; + if (addr & SECTION_SIZE) + pmd++; #endif + do { + *pmd = __pmd(phys | type->prot_sect); + phys += SECTION_SIZE; + } while (pmd++, addr += SECTION_SIZE, addr != end); - do { - *pmd = __pmd(phys | type->prot_sect); - phys += SECTION_SIZE; - } while (pmd++, addr += SECTION_SIZE, addr != end); + flush_pmd_entry(pmd); +} - flush_pmd_entry(p); - } else { +static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type) +{ + pmd_t *pmd = pmd_offset(pud, addr); + unsigned long next; + + do { /* - * No need to loop; pte's aren't interested in the - * individual L1 entries. + * With LPAE, we must loop over to map + * all the pmds for the given range. */ - alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type); - } + next = pmd_addr_end(addr, end); + + /* + * Try a section mapping - addr, next and phys must all be + * aligned to a section boundary. + */ + if (type->prot_sect && + ((addr | next | phys) & ~SECTION_MASK) == 0) { + map_init_section(pmd, addr, next, phys, type); + } else { + alloc_init_pte(pmd, addr, next, + __phys_to_pfn(phys), type); + } + + phys += next - addr; + + } while (pmd++, addr = next, addr != end); } static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, @@ -641,7 +662,7 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, do { next = pud_addr_end(addr, end); - alloc_init_section(pud, addr, next, phys, type); + alloc_init_pmd(pud, addr, next, phys, type); phys += next - addr; } while (pud++, addr = next, addr != end); } -- cgit v1.2.3 From 8164f7af88d9ad3a757bd14f634b23997ee77f6b Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 18 Mar 2013 19:44:15 +0100 Subject: ARM: 7680/1: Detect support for SDIV/UDIV from ISAR0 register The ISAR0 register indicates support for the SDIV and UDIV instructions in both the Thumb and ARM instruction set. Read the register to detect the supported instructions and update the elf_hwcap mask as appropriate. This is better than adding more and more cpuid checks in proc-v7.S for each new cpu variant that supports these instructions. Acked-by: Will Deacon Cc: Stepan Moskovchenko Signed-off-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 20 ++++++++++++++++++++ arch/arm/mm/proc-v7.S | 4 ++-- 2 files changed, 22 insertions(+), 2 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index e2c8bbffb0b1..f3ac13f69b7a 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -353,6 +353,23 @@ void __init early_print(const char *str, ...) printk("%s", buf); } +static void __init cpuid_init_hwcaps(void) +{ + unsigned int divide_instrs; + + if (cpu_architecture() < CPU_ARCH_ARMv7) + return; + + divide_instrs = (read_cpuid_ext(CPUID_EXT_ISAR0) & 0x0f000000) >> 24; + + switch (divide_instrs) { + case 2: + elf_hwcap |= HWCAP_IDIVA; + case 1: + elf_hwcap |= HWCAP_IDIVT; + } +} + static void __init feat_v6_fixup(void) { int id = read_cpuid_id(); @@ -483,6 +500,9 @@ static void __init setup_processor(void) snprintf(elf_platform, ELF_PLATFORM_SIZE, "%s%c", list->elf_name, ENDIANNESS); elf_hwcap = list->elf_hwcap; + + cpuid_init_hwcaps(); + #ifndef CONFIG_ARM_THUMB elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT); #endif diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 3a3c015f8d5c..bcd3d48922fb 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -420,7 +420,7 @@ __v7_pj4b_proc_info: __v7_ca7mp_proc_info: .long 0x410fc070 .long 0xff0ffff0 - __v7_proc __v7_ca7mp_setup, hwcaps = HWCAP_IDIV + __v7_proc __v7_ca7mp_setup .size __v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info /* @@ -430,7 +430,7 @@ __v7_ca7mp_proc_info: __v7_ca15mp_proc_info: .long 0x410fc0f0 .long 0xff0ffff0 - __v7_proc __v7_ca15mp_setup, hwcaps = HWCAP_IDIV + __v7_proc __v7_ca15mp_setup .size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info /* -- cgit v1.2.3 From 120ecfafabec382c4feb79ff159ef42a39b6d33b Mon Sep 17 00:00:00 2001 From: Stepan Moskovchenko Date: Mon, 18 Mar 2013 19:44:16 +0100 Subject: ARM: 7678/1: Work around faulty ISAR0 register in some Krait CPUs Some early versions of the Krait CPU design incorrectly indicate that they only support the UDIV and SDIV instructions in Thumb mode when they actually support them in ARM and Thumb mode. It seems that these CPUs follow the DDI0406B ARM ARM which has two possible values for the divide instructions field, instead of the DDI0406C document which has three possible values. Work around this problem by checking the MIDR against Krait CPUs with this faulty ISAR0 register and force the hwcaps to indicate support in both modes. [sboyd: Rewrote commit text to reflect real reasoning now that we autodetect udiv/sdiv] Signed-off-by: Stepan Moskovchenko Acked-by: Will Deacon Signed-off-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/mm/proc-v7.S | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index bcd3d48922fb..f584d3f5b37c 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -433,6 +433,21 @@ __v7_ca15mp_proc_info: __v7_proc __v7_ca15mp_setup .size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info + /* + * Qualcomm Inc. Krait processors. + */ + .type __krait_proc_info, #object +__krait_proc_info: + .long 0x510f0400 @ Required ID value + .long 0xff0ffc00 @ Mask for ID + /* + * Some Krait processors don't indicate support for SDIV and UDIV + * instructions in the ARM instruction set, even though they actually + * do support them. + */ + __v7_proc __v7_setup, hwcaps = HWCAP_IDIV + .size __krait_proc_info, . - __krait_proc_info + /* * Match any ARMv7 processor core. */ -- cgit v1.2.3 From 6e7aceeb7c70b9ebad79bcfe91fcf738826e8e6d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 25 Mar 2013 17:02:48 +0100 Subject: ARM: 7682/1: cache-l2x0: fix masking of RTL revision numbering and set_debug init Commit b8db6b8 (ARM: 7547/4: cache-l2x0: add support for Aurora L2 cache ctrl) moved the masking of the part ID which caused the RTL version to be lost. Commit 6248d06 (ARM: 7545/1: cache-l2x0: make outer_cache_fns a field of l2x0_of_data) changed how .set_debug is initialized. Both commits break commit 74ddcdb (ARM: 7608/1: l2x0: Only set .set_debug on PL310 r3p0 and earlier) which uses the RTL version to conditionally set .set_debug function pointer. Commit b8db6b8 also caused the printed cache ID to be missing the version information. Fix this by reverting how the part number is masked so the RTL version info is maintained. The cache-id-part DT property does not set the RTL bits so masking them should have no effect. Also, re-arrange the order of the function pointer init so the .set_debug function can be overridden. Reported-by: Paolo Pisati Signed-off-by: Rob Herring Cc: Gregory CLEMENT Cc: Yehuda Yitschak Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index c2f37390308a..c465faca51b0 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -299,7 +299,7 @@ static void l2x0_unlock(u32 cache_id) int lockregs; int i; - switch (cache_id) { + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { case L2X0_CACHE_ID_PART_L310: lockregs = 8; break; @@ -333,15 +333,14 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) if (cache_id_part_number_from_dt) cache_id = cache_id_part_number_from_dt; else - cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID) - & L2X0_CACHE_ID_PART_MASK; + cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); aux &= aux_mask; aux |= aux_val; /* Determine the number of ways */ - switch (cache_id) { + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { case L2X0_CACHE_ID_PART_L310: if (aux & (1 << 16)) ways = 16; @@ -725,7 +724,6 @@ static const struct l2x0_of_data pl310_data = { .flush_all = l2x0_flush_all, .inv_all = l2x0_inv_all, .disable = l2x0_disable, - .set_debug = pl310_set_debug, }, }; @@ -814,9 +812,8 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) data->save(); of_init = true; - l2x0_init(l2x0_base, aux_val, aux_mask); - memcpy(&outer_cache, &data->outer_cache, sizeof(outer_cache)); + l2x0_init(l2x0_base, aux_val, aux_mask); return 0; } -- cgit v1.2.3 From 93dc68876b608da041fe40ed39424b0fcd5aa2fb Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 26 Mar 2013 23:35:04 +0100 Subject: ARM: 7684/1: errata: Workaround for Cortex-A15 erratum 798181 (TLBI/DSB operations) On Cortex-A15 (r0p0..r3p2) the TLBI/DSB are not adequately shooting down all use of the old entries. This patch implements the erratum workaround which consists of: 1. Dummy TLBIMVAIS and DSB on the CPU doing the TLBI operation. 2. Send IPI to the CPUs that are running the same mm (and ASID) as the one being invalidated (or all the online CPUs for global pages). 3. CPU receiving the IPI executes a DMB and CLREX (part of the exception return code already). Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/Kconfig | 10 ++++++ arch/arm/include/asm/highmem.h | 7 ++++ arch/arm/include/asm/mmu_context.h | 2 ++ arch/arm/include/asm/tlbflush.h | 15 +++++++++ arch/arm/kernel/smp_tlb.c | 66 ++++++++++++++++++++++++++++++++++++++ arch/arm/mm/context.c | 3 +- 6 files changed, 102 insertions(+), 1 deletion(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 12ea3b3d49a9..1cacda426a0e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1439,6 +1439,16 @@ config ARM_ERRATA_775420 to deadlock. This workaround puts DSB before executing ISB if an abort may occur on cache maintenance. +config ARM_ERRATA_798181 + bool "ARM errata: TLBI/DSB failure on Cortex-A15" + depends on CPU_V7 && SMP + help + On Cortex-A15 (r0p0..r3p2) the TLBI*IS/DSB operations are not + adequately shooting down all use of the old entries. This + option enables the Linux kernel workaround for this erratum + which sends an IPI to the CPUs that are running the same ASID + as the one being invalidated. + endmenu source "arch/arm/common/Kconfig" diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h index 8c5e828f484d..91b99abe7a95 100644 --- a/arch/arm/include/asm/highmem.h +++ b/arch/arm/include/asm/highmem.h @@ -41,6 +41,13 @@ extern void kunmap_high(struct page *page); #endif #endif +/* + * Needed to be able to broadcast the TLB invalidation for kmap. + */ +#ifdef CONFIG_ARM_ERRATA_798181 +#undef ARCH_NEEDS_KMAP_HIGH_GET +#endif + #ifdef ARCH_NEEDS_KMAP_HIGH_GET extern void *kmap_high_get(struct page *page); #else diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index 863a6611323c..a7b85e0d0cc1 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -27,6 +27,8 @@ void __check_vmalloc_seq(struct mm_struct *mm); void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); #define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) +DECLARE_PER_CPU(atomic64_t, active_asids); + #else /* !CONFIG_CPU_HAS_ASID */ #ifdef CONFIG_MMU diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index 4db8c8820f0d..9e9c041358ca 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -450,6 +450,21 @@ static inline void local_flush_bp_all(void) isb(); } +#ifdef CONFIG_ARM_ERRATA_798181 +static inline void dummy_flush_tlb_a15_erratum(void) +{ + /* + * Dummy TLBIMVAIS. Using the unmapped address 0 and ASID 0. + */ + asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0)); + dsb(); +} +#else +static inline void dummy_flush_tlb_a15_erratum(void) +{ +} +#endif + /* * flush_pmd_entry * diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index bd0300531399..e82e1d248772 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -12,6 +12,7 @@ #include #include +#include /**********************************************************************/ @@ -69,12 +70,72 @@ static inline void ipi_flush_bp_all(void *ignored) local_flush_bp_all(); } +#ifdef CONFIG_ARM_ERRATA_798181 +static int erratum_a15_798181(void) +{ + unsigned int midr = read_cpuid_id(); + + /* Cortex-A15 r0p0..r3p2 affected */ + if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2) + return 0; + return 1; +} +#else +static int erratum_a15_798181(void) +{ + return 0; +} +#endif + +static void ipi_flush_tlb_a15_erratum(void *arg) +{ + dmb(); +} + +static void broadcast_tlb_a15_erratum(void) +{ + if (!erratum_a15_798181()) + return; + + dummy_flush_tlb_a15_erratum(); + smp_call_function_many(cpu_online_mask, ipi_flush_tlb_a15_erratum, + NULL, 1); +} + +static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm) +{ + int cpu; + cpumask_t mask = { CPU_BITS_NONE }; + + if (!erratum_a15_798181()) + return; + + dummy_flush_tlb_a15_erratum(); + for_each_online_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; + /* + * We only need to send an IPI if the other CPUs are running + * the same ASID as the one being invalidated. There is no + * need for locking around the active_asids check since the + * switch_mm() function has at least one dmb() (as required by + * this workaround) in case a context switch happens on + * another CPU after the condition below. + */ + if (atomic64_read(&mm->context.id) == + atomic64_read(&per_cpu(active_asids, cpu))) + cpumask_set_cpu(cpu, &mask); + } + smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1); +} + void flush_tlb_all(void) { if (tlb_ops_need_broadcast()) on_each_cpu(ipi_flush_tlb_all, NULL, 1); else local_flush_tlb_all(); + broadcast_tlb_a15_erratum(); } void flush_tlb_mm(struct mm_struct *mm) @@ -83,6 +144,7 @@ void flush_tlb_mm(struct mm_struct *mm) on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1); else local_flush_tlb_mm(mm); + broadcast_tlb_mm_a15_erratum(mm); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) @@ -95,6 +157,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) &ta, 1); } else local_flush_tlb_page(vma, uaddr); + broadcast_tlb_mm_a15_erratum(vma->vm_mm); } void flush_tlb_kernel_page(unsigned long kaddr) @@ -105,6 +168,7 @@ void flush_tlb_kernel_page(unsigned long kaddr) on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1); } else local_flush_tlb_kernel_page(kaddr); + broadcast_tlb_a15_erratum(); } void flush_tlb_range(struct vm_area_struct *vma, @@ -119,6 +183,7 @@ void flush_tlb_range(struct vm_area_struct *vma, &ta, 1); } else local_flush_tlb_range(vma, start, end); + broadcast_tlb_mm_a15_erratum(vma->vm_mm); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) @@ -130,6 +195,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); } else local_flush_tlb_kernel_range(start, end); + broadcast_tlb_a15_erratum(); } void flush_bp_all(void) diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index a5a4b2bc42ba..2ac37372ef52 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -48,7 +48,7 @@ static DEFINE_RAW_SPINLOCK(cpu_asid_lock); static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); -static DEFINE_PER_CPU(atomic64_t, active_asids); +DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); static cpumask_t tlb_flush_pending; @@ -215,6 +215,7 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) { local_flush_bp_all(); local_flush_tlb_all(); + dummy_flush_tlb_a15_erratum(); } atomic64_set(&per_cpu(active_asids, cpu), asid); -- cgit v1.2.3