diff options
Diffstat (limited to 'arch/tile/mm')
-rw-r--r-- | arch/tile/mm/fault.c | 8 | ||||
-rw-r--r-- | arch/tile/mm/homecache.c | 38 | ||||
-rw-r--r-- | arch/tile/mm/init.c | 34 | ||||
-rw-r--r-- | arch/tile/mm/migrate_32.S | 1 | ||||
-rw-r--r-- | arch/tile/mm/pgtable.c | 181 |
5 files changed, 189 insertions, 73 deletions
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index dcebfc831cd6..758f597f488c 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -655,14 +655,6 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, } /* - * NOTE: the one other type of access that might bring us here - * are the memory ops in __tns_atomic_acquire/__tns_atomic_release, - * but we don't have to check specially for them since we can - * always safely return to the address of the fault and retry, - * since no separate atomic locks are involved. - */ - - /* * Now that we have released the atomic lock (if necessary), * it's safe to spin if the PTE that caused the fault was migrating. */ diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index d78df3a6ee15..cbe6f4f9eca3 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -179,23 +179,46 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, panic("Unsafe to continue."); } +void flush_remote_page(struct page *page, int order) +{ + int i, pages = (1 << order); + for (i = 0; i < pages; ++i, ++page) { + void *p = kmap_atomic(page); + int hfh = 0; + int home = page_home(page); +#if CHIP_HAS_CBOX_HOME_MAP() + if (home == PAGE_HOME_HASH) + hfh = 1; + else +#endif + BUG_ON(home < 0 || home >= NR_CPUS); + finv_buffer_remote(p, PAGE_SIZE, hfh); + kunmap_atomic(p); + } +} + void homecache_evict(const struct cpumask *mask) { flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); } -/* Return a mask of the cpus whose caches currently own these pages. */ -static void homecache_mask(struct page *page, int pages, - struct cpumask *home_mask) +/* + * Return a mask of the cpus whose caches currently own these pages. + * The return value is whether the pages are all coherently cached + * (i.e. none are immutable, incoherent, or uncached). + */ +static int homecache_mask(struct page *page, int pages, + struct cpumask *home_mask) { int i; + int cached_coherently = 1; cpumask_clear(home_mask); for (i = 0; i < pages; ++i) { int home = page_home(&page[i]); if (home == PAGE_HOME_IMMUTABLE || home == PAGE_HOME_INCOHERENT) { cpumask_copy(home_mask, cpu_possible_mask); - return; + return 0; } #if CHIP_HAS_CBOX_HOME_MAP() if (home == PAGE_HOME_HASH) { @@ -203,11 +226,14 @@ static void homecache_mask(struct page *page, int pages, continue; } #endif - if (home == PAGE_HOME_UNCACHED) + if (home == PAGE_HOME_UNCACHED) { + cached_coherently = 0; continue; + } BUG_ON(home < 0 || home >= NR_CPUS); cpumask_set_cpu(home, home_mask); } + return cached_coherently; } /* @@ -386,7 +412,7 @@ void homecache_change_page_home(struct page *page, int order, int home) pte_t *ptep = virt_to_pte(NULL, kva); pte_t pteval = *ptep; BUG_ON(!pte_present(pteval) || pte_huge(pteval)); - *ptep = pte_set_home(pteval, home); + __set_pte(ptep, pte_set_home(pteval, home)); } } diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 0b9ce69b0ee5..d6e87fda2fb2 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -53,22 +53,11 @@ #include "migrate.h" -/* - * We could set FORCE_MAX_ZONEORDER to "(HPAGE_SHIFT - PAGE_SHIFT + 1)" - * in the Tile Kconfig, but this generates configure warnings. - * Do it here and force people to get it right to compile this file. - * The problem is that with 4KB small pages and 16MB huge pages, - * the default value doesn't allow us to group enough small pages - * together to make up a huge page. - */ -#if CONFIG_FORCE_MAX_ZONEORDER < HPAGE_SHIFT - PAGE_SHIFT + 1 -# error "Change FORCE_MAX_ZONEORDER in arch/tile/Kconfig to match page size" -#endif - #define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0)) #ifndef __tilegx__ unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE; +EXPORT_SYMBOL(VMALLOC_RESERVE); #endif DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); @@ -445,7 +434,7 @@ static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) /* Temporary page table we use for staging. */ static pgd_t pgtables[PTRS_PER_PGD] - __attribute__((section(".init.page"))); + __attribute__((aligned(HV_PAGE_TABLE_ALIGN))); /* * This maps the physical memory to kernel virtual address space, a total @@ -653,6 +642,17 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) memcpy(pgd_base, pgtables, sizeof(pgtables)); __install_page_table(pgd_base, __get_cpu_var(current_asid), swapper_pgprot); + + /* + * We just read swapper_pgprot and thus brought it into the cache, + * with its new home & caching mode. When we start the other CPUs, + * they're going to reference swapper_pgprot via their initial fake + * VA-is-PA mappings, which cache everything locally. At that + * time, if it's in our cache with a conflicting home, the + * simulator's coherence checker will complain. So, flush it out + * of our cache; we're not going to ever use it again anyway. + */ + __insn_finv(&swapper_pgprot); } /* @@ -950,11 +950,7 @@ struct kmem_cache *pgd_cache; void __init pgtable_cache_init(void) { - pgd_cache = kmem_cache_create("pgd", - PTRS_PER_PGD*sizeof(pgd_t), - PTRS_PER_PGD*sizeof(pgd_t), - 0, - NULL); + pgd_cache = kmem_cache_create("pgd", SIZEOF_PGD, SIZEOF_PGD, 0, NULL); if (!pgd_cache) panic("pgtable_cache_init(): Cannot create pgd cache"); } @@ -989,7 +985,7 @@ static long __write_once initfree = 1; static int __init set_initfree(char *str) { long val; - if (strict_strtol(str, 0, &val)) { + if (strict_strtol(str, 0, &val) == 0) { initfree = val; pr_info("initfree: %s free init pages\n", initfree ? "will" : "won't"); diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S index f738765cd1e6..ac01a7cdf77f 100644 --- a/arch/tile/mm/migrate_32.S +++ b/arch/tile/mm/migrate_32.S @@ -18,6 +18,7 @@ #include <linux/linkage.h> #include <linux/threads.h> #include <asm/page.h> +#include <asm/thread_info.h> #include <asm/types.h> #include <asm/asm-offsets.h> #include <hv/hypervisor.h> diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 1f5430c53d0d..1a2b36f8866d 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -142,6 +142,76 @@ pte_t *_pte_offset_map(pmd_t *dir, unsigned long address) } #endif +/** + * shatter_huge_page() - ensure a given address is mapped by a small page. + * + * This function converts a huge PTE mapping kernel LOWMEM into a bunch + * of small PTEs with the same caching. No cache flush required, but we + * must do a global TLB flush. + * + * Any caller that wishes to modify a kernel mapping that might + * have been made with a huge page should call this function, + * since doing so properly avoids race conditions with installing the + * newly-shattered page and then flushing all the TLB entries. + * + * @addr: Address at which to shatter any existing huge page. + */ +void shatter_huge_page(unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + unsigned long flags = 0; /* happy compiler */ +#ifdef __PAGETABLE_PMD_FOLDED + struct list_head *pos; +#endif + + /* Get a pointer to the pmd entry that we need to change. */ + addr &= HPAGE_MASK; + BUG_ON(pgd_addr_invalid(addr)); + BUG_ON(addr < PAGE_OFFSET); /* only for kernel LOWMEM */ + pgd = swapper_pg_dir + pgd_index(addr); + pud = pud_offset(pgd, addr); + BUG_ON(!pud_present(*pud)); + pmd = pmd_offset(pud, addr); + BUG_ON(!pmd_present(*pmd)); + if (!pmd_huge_page(*pmd)) + return; + + /* + * Grab the pgd_lock, since we may need it to walk the pgd_list, + * and since we need some kind of lock here to avoid races. + */ + spin_lock_irqsave(&pgd_lock, flags); + if (!pmd_huge_page(*pmd)) { + /* Lost the race to convert the huge page. */ + spin_unlock_irqrestore(&pgd_lock, flags); + return; + } + + /* Shatter the huge page into the preallocated L2 page table. */ + pmd_populate_kernel(&init_mm, pmd, + get_prealloc_pte(pte_pfn(*(pte_t *)pmd))); + +#ifdef __PAGETABLE_PMD_FOLDED + /* Walk every pgd on the system and update the pmd there. */ + list_for_each(pos, &pgd_list) { + pmd_t *copy_pmd; + pgd = list_to_pgd(pos) + pgd_index(addr); + pud = pud_offset(pgd, addr); + copy_pmd = pmd_offset(pud, addr); + __set_pmd(copy_pmd, *pmd); + } +#endif + + /* Tell every cpu to notice the change. */ + flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE, + cpu_possible_mask, NULL, 0); + + /* Hold the lock until the TLB flush is finished to avoid races. */ + spin_unlock_irqrestore(&pgd_lock, flags); +} + /* * List of all pgd's needed so it can invalidate entries in both cached * and uncached pgd's. This is essentially codepath-based locking @@ -184,9 +254,9 @@ static void pgd_ctor(pgd_t *pgd) BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0); #endif - clone_pgd_range(pgd + KERNEL_PGD_INDEX_START, - swapper_pg_dir + KERNEL_PGD_INDEX_START, - KERNEL_PGD_PTRS); + memcpy(pgd + KERNEL_PGD_INDEX_START, + swapper_pg_dir + KERNEL_PGD_INDEX_START, + KERNEL_PGD_PTRS * sizeof(pgd_t)); pgd_list_add(pgd); spin_unlock_irqrestore(&pgd_lock, flags); @@ -220,8 +290,11 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO|__GFP_COMP; + gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO; struct page *p; +#if L2_USER_PGTABLE_ORDER > 0 + int i; +#endif #ifdef CONFIG_HIGHPTE flags |= __GFP_HIGHMEM; @@ -231,6 +304,18 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) if (p == NULL) return NULL; +#if L2_USER_PGTABLE_ORDER > 0 + /* + * Make every page have a page_count() of one, not just the first. + * We don't use __GFP_COMP since it doesn't look like it works + * correctly with tlb_remove_page(). + */ + for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { + init_page_count(p+i); + inc_zone_page_state(p+i, NR_PAGETABLE); + } +#endif + pgtable_page_ctor(p); return p; } @@ -242,8 +327,15 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) */ void pte_free(struct mm_struct *mm, struct page *p) { + int i; + pgtable_page_dtor(p); - __free_pages(p, L2_USER_PGTABLE_ORDER); + __free_page(p); + + for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { + __free_page(p+i); + dec_zone_page_state(p+i, NR_PAGETABLE); + } } void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, @@ -252,18 +344,11 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, int i; pgtable_page_dtor(pte); - tlb->need_flush = 1; - if (tlb_fast_mode(tlb)) { - struct page *pte_pages[L2_USER_PGTABLE_PAGES]; - for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) - pte_pages[i] = pte + i; - free_pages_and_swap_cache(pte_pages, L2_USER_PGTABLE_PAGES); - return; - } - for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) { - tlb->pages[tlb->nr++] = pte + i; - if (tlb->nr >= FREE_PTE_NR) - tlb_flush_mmu(tlb, 0, 0); + tlb_remove_page(tlb, pte); + + for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { + tlb_remove_page(tlb, pte + i); + dec_zone_page_state(pte + i, NR_PAGETABLE); } } @@ -346,35 +431,51 @@ int get_remote_cache_cpu(pgprot_t prot) return x + y * smp_width; } -void set_pte_order(pte_t *ptep, pte_t pte, int order) +/* + * Convert a kernel VA to a PA and homing information. + */ +int va_to_cpa_and_pte(void *va, unsigned long long *cpa, pte_t *pte) { - unsigned long pfn = pte_pfn(pte); - struct page *page = pfn_to_page(pfn); + struct page *page = virt_to_page(va); + pte_t null_pte = { 0 }; - /* Update the home of a PTE if necessary */ - pte = pte_set_home(pte, page_home(page)); + *cpa = __pa(va); + /* Note that this is not writing a page table, just returning a pte. */ + *pte = pte_set_home(null_pte, page_home(page)); + + return 0; /* return non-zero if not hfh? */ +} +EXPORT_SYMBOL(va_to_cpa_and_pte); + +void __set_pte(pte_t *ptep, pte_t pte) +{ #ifdef __tilegx__ *ptep = pte; #else - /* - * When setting a PTE, write the high bits first, then write - * the low bits. This sets the "present" bit only after the - * other bits are in place. If a particular PTE update - * involves transitioning from one valid PTE to another, it - * may be necessary to call set_pte_order() more than once, - * transitioning via a suitable intermediate state. - * Note that this sequence also means that if we are transitioning - * from any migrating PTE to a non-migrating one, we will not - * see a half-updated PTE with the migrating bit off. - */ -#if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32 -# error Must write the present and migrating bits last -#endif - ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); - barrier(); - ((u32 *)ptep)[0] = (u32)(pte_val(pte)); -#endif +# if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32 +# error Must write the present and migrating bits last +# endif + if (pte_present(pte)) { + ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); + barrier(); + ((u32 *)ptep)[0] = (u32)(pte_val(pte)); + } else { + ((u32 *)ptep)[0] = (u32)(pte_val(pte)); + barrier(); + ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); + } +#endif /* __tilegx__ */ +} + +void set_pte(pte_t *ptep, pte_t pte) +{ + struct page *page = pfn_to_page(pte_pfn(pte)); + + /* Update the home of a PTE if necessary */ + pte = pte_set_home(pte, page_home(page)); + + __set_pte(ptep, pte); } /* Can this mm load a PTE with cached_priority set? */ |