diff options
Diffstat (limited to 'arch/powerpc/include/asm/book3s')
20 files changed, 1952 insertions, 798 deletions
diff --git a/arch/powerpc/include/asm/book3s/32/hash.h b/arch/powerpc/include/asm/book3s/32/hash.h index 264b754d65b0..880db13a2e9f 100644 --- a/arch/powerpc/include/asm/book3s/32/hash.h +++ b/arch/powerpc/include/asm/book3s/32/hash.h @@ -39,8 +39,5 @@ #define _PMD_PRESENT_MASK (PAGE_MASK) #define _PMD_BAD (~PAGE_MASK) -/* Hash table based platforms need atomic updates of the linux PTE */ -#define PTE_ATOMIC_UPDATES 1 - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_BOOK3S_32_HASH_H */ diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 16f513e5cbd7..b82e063494dd 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -1,5 +1,5 @@ -#ifndef _ASM_POWERPC_MMU_HASH32_H_ -#define _ASM_POWERPC_MMU_HASH32_H_ +#ifndef _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_ +#define _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_ /* * 32-bit hash table MMU support */ @@ -90,4 +90,4 @@ typedef struct { #define mmu_virtual_psize MMU_PAGE_4K #define mmu_linear_psize MMU_PAGE_256M -#endif /* _ASM_POWERPC_MMU_HASH32_H_ */ +#endif /* _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_ */ diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h new file mode 100644 index 000000000000..a2350194fc76 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -0,0 +1,109 @@ +#ifndef _ASM_POWERPC_BOOK3S_32_PGALLOC_H +#define _ASM_POWERPC_BOOK3S_32_PGALLOC_H + +#include <linux/threads.h> + +/* For 32-bit, all levels of page tables are just drawn from get_free_page() */ +#define MAX_PGTABLE_INDEX_SIZE 0 + +extern void __bad_pte(pmd_t *pmd); + +extern pgd_t *pgd_alloc(struct mm_struct *mm); +extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); + +/* + * We don't have any real pmd's, and this code never triggers because + * the pgd will always be present.. + */ +/* #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) */ +#define pmd_free(mm, x) do { } while (0) +#define __pmd_free_tlb(tlb,x,a) do { } while (0) +/* #define pgd_populate(mm, pmd, pte) BUG() */ + +#ifndef CONFIG_BOOKE + +static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, + pte_t *pte) +{ + *pmdp = __pmd(__pa(pte) | _PMD_PRESENT); +} + +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pte_page) +{ + *pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_PRESENT); +} + +#define pmd_pgtable(pmd) pmd_page(pmd) +#else + +static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, + pte_t *pte) +{ + *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT); +} + +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pte_page) +{ + *pmdp = __pmd((unsigned long)lowmem_page_address(pte_page) | _PMD_PRESENT); +} + +#define pmd_pgtable(pmd) pmd_page(pmd) +#endif + +extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr); +extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr); + +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + free_page((unsigned long)pte); +} + +static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) +{ + pgtable_page_dtor(ptepage); + __free_page(ptepage); +} + +static inline void pgtable_free(void *table, unsigned index_size) +{ + BUG_ON(index_size); /* 32-bit doesn't use this */ + free_page((unsigned long)table); +} + +#define check_pgt_cache() do { } while (0) + +#ifdef CONFIG_SMP +static inline void pgtable_free_tlb(struct mmu_gather *tlb, + void *table, int shift) +{ + unsigned long pgf = (unsigned long)table; + BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); + pgf |= shift; + tlb_remove_table(tlb, (void *)pgf); +} + +static inline void __tlb_remove_table(void *_table) +{ + void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE); + unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE; + + pgtable_free(table, shift); +} +#else +static inline void pgtable_free_tlb(struct mmu_gather *tlb, + void *table, int shift) +{ + pgtable_free(table, shift); +} +#endif + +static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, + unsigned long address) +{ + tlb_flush_pgtable(tlb, address); + pgtable_page_dtor(table); + pgtable_free_tlb(tlb, page_address(table), 0); +} +#endif /* _ASM_POWERPC_BOOK3S_32_PGALLOC_H */ diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 5f08a0832238..1af837c561ba 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -5,58 +5,31 @@ * for each page table entry. The PMD and PGD level use a 32b record for * each entry by assuming that each entry is page aligned. */ -#define PTE_INDEX_SIZE 9 -#define PMD_INDEX_SIZE 7 -#define PUD_INDEX_SIZE 9 -#define PGD_INDEX_SIZE 9 +#define H_PTE_INDEX_SIZE 9 +#define H_PMD_INDEX_SIZE 7 +#define H_PUD_INDEX_SIZE 9 +#define H_PGD_INDEX_SIZE 9 #ifndef __ASSEMBLY__ -#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) -#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) -#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) -#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) -#endif /* __ASSEMBLY__ */ - -#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) -#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) -#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) -#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) - -/* PMD_SHIFT determines what a second-level page table entry can map */ -#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) -#define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) +#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE) +#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE) +#define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) +#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) /* With 4k base page size, hugepage PTEs go at the PMD level */ #define MIN_HUGEPTE_SHIFT PMD_SHIFT -/* PUD_SHIFT determines what a third-level page table entry can map */ -#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) -#define PUD_SIZE (1UL << PUD_SHIFT) -#define PUD_MASK (~(PUD_SIZE-1)) - -/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ -#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) -#define PGDIR_SIZE (1UL << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) - -/* Bits to mask out from a PMD to get to the PTE page */ -#define PMD_MASKED_BITS 0 -/* Bits to mask out from a PUD to get to the PMD page */ -#define PUD_MASKED_BITS 0 -/* Bits to mask out from a PGD to get to the PUD page */ -#define PGD_MASKED_BITS 0 - /* PTE flags to conserve for HPTE identification */ -#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \ - _PAGE_F_SECOND | _PAGE_F_GIX) - -/* shift to put page number into pte */ -#define PTE_RPN_SHIFT (12) -#define PTE_RPN_SIZE (45) /* gives 57-bit real addresses */ - -#define _PAGE_4K_PFN 0 -#ifndef __ASSEMBLY__ +#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \ + H_PAGE_F_SECOND | H_PAGE_F_GIX) +/* + * Not supported by 4k linux page size + */ +#define H_PAGE_4K_PFN 0x0 +#define H_PAGE_THP_HUGE 0x0 +#define H_PAGE_COMBO 0x0 +#define H_PTE_FRAG_NR 0 +#define H_PTE_FRAG_SIZE_SHIFT 0 /* * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */ @@ -64,37 +37,76 @@ remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot)) #ifdef CONFIG_HUGETLB_PAGE -/* - * For 4k page size, we support explicit hugepage via hugepd - */ -static inline int pmd_huge(pmd_t pmd) +static inline int hash__hugepd_ok(hugepd_t hpd) +{ + /* + * if it is not a pte and have hugepd shift mask + * set, then it is a hugepd directory pointer + */ + if (!(hpd.pd & _PAGE_PTE) && + ((hpd.pd & HUGEPD_SHIFT_MASK) != 0)) + return true; + return false; +} +#endif + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +static inline char *get_hpte_slot_array(pmd_t *pmdp) +{ + BUG(); + return NULL; +} + +static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index) { + BUG(); return 0; } -static inline int pud_huge(pud_t pud) +static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array, + int index) { + BUG(); return 0; } -static inline int pgd_huge(pgd_t pgd) +static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, + unsigned int index, unsigned int hidx) +{ + BUG(); +} + +static inline int hash__pmd_trans_huge(pmd_t pmd) { return 0; } -#define pgd_huge pgd_huge -static inline int hugepd_ok(hugepd_t hpd) +static inline int hash__pmd_same(pmd_t pmd_a, pmd_t pmd_b) { - /* - * if it is not a pte and have hugepd shift mask - * set, then it is a hugepd directory pointer - */ - if (!(hpd.pd & _PAGE_PTE) && - ((hpd.pd & HUGEPD_SHIFT_MASK) != 0)) - return true; - return false; + BUG(); + return 0; } -#define is_hugepd(hpd) (hugepd_ok(hpd)) + +static inline pmd_t hash__pmd_mkhuge(pmd_t pmd) +{ + BUG(); + return pmd; +} + +extern unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp, + unsigned long clr, unsigned long set); +extern pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); +extern void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); +extern pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); +extern void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); +extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp); +extern int hash__has_transparent_hugepage(void); #endif #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 0a7956a80a08..5aae4f530c21 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -1,73 +1,44 @@ #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H -#define PTE_INDEX_SIZE 8 -#define PMD_INDEX_SIZE 5 -#define PUD_INDEX_SIZE 5 -#define PGD_INDEX_SIZE 12 - -#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) -#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) -#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) -#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) +#define H_PTE_INDEX_SIZE 8 +#define H_PMD_INDEX_SIZE 5 +#define H_PUD_INDEX_SIZE 5 +#define H_PGD_INDEX_SIZE 12 /* With 4k base page size, hugepage PTEs go at the PMD level */ #define MIN_HUGEPTE_SHIFT PAGE_SHIFT -/* PMD_SHIFT determines what a second-level page table entry can map */ -#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) -#define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) - -/* PUD_SHIFT determines what a third-level page table entry can map */ -#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) -#define PUD_SIZE (1UL << PUD_SHIFT) -#define PUD_MASK (~(PUD_SIZE-1)) - -/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ -#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) -#define PGDIR_SIZE (1UL << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) - -#define _PAGE_COMBO 0x00001000 /* this is a combo 4k page */ -#define _PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */ +#define H_PAGE_COMBO 0x00001000 /* this is a combo 4k page */ +#define H_PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */ /* - * Used to track subpage group valid if _PAGE_COMBO is set - * This overloads _PAGE_F_GIX and _PAGE_F_SECOND + * We need to differentiate between explicit huge page and THP huge + * page, since THP huge page also need to track real subpage details */ -#define _PAGE_COMBO_VALID (_PAGE_F_GIX | _PAGE_F_SECOND) +#define H_PAGE_THP_HUGE H_PAGE_4K_PFN -/* PTE flags to conserve for HPTE identification */ -#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_F_SECOND | \ - _PAGE_F_GIX | _PAGE_HASHPTE | _PAGE_COMBO) - -/* Shift to put page number into pte. - * - * That gives us a max RPN of 41 bits, which means a max of 57 bits - * of addressable physical space, or 53 bits for the special 4k PFNs. +/* + * Used to track subpage group valid if H_PAGE_COMBO is set + * This overloads H_PAGE_F_GIX and H_PAGE_F_SECOND */ -#define PTE_RPN_SHIFT (16) -#define PTE_RPN_SIZE (41) +#define H_PAGE_COMBO_VALID (H_PAGE_F_GIX | H_PAGE_F_SECOND) +/* PTE flags to conserve for HPTE identification */ +#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_F_SECOND | \ + H_PAGE_F_GIX | H_PAGE_HASHPTE | H_PAGE_COMBO) /* * we support 16 fragments per PTE page of 64K size. */ -#define PTE_FRAG_NR 16 +#define H_PTE_FRAG_NR 16 /* * We use a 2K PTE page fragment and another 2K for storing * real_pte_t hash index */ -#define PTE_FRAG_SIZE_SHIFT 12 +#define H_PTE_FRAG_SIZE_SHIFT 12 #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT) -/* Bits to mask out from a PMD to get to the PTE page */ -#define PMD_MASKED_BITS 0xc0000000000000ffUL -/* Bits to mask out from a PUD to get to the PMD page */ -#define PUD_MASKED_BITS 0xc0000000000000ffUL -/* Bits to mask out from a PGD to get to the PUD page */ -#define PGD_MASKED_BITS 0xc0000000000000ffUL - #ifndef __ASSEMBLY__ +#include <asm/errno.h> /* * With 64K pages on hash table, we have a special PTE format that @@ -83,9 +54,9 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) rpte.pte = pte; rpte.hidx = 0; - if (pte_val(pte) & _PAGE_COMBO) { + if (pte_val(pte) & H_PAGE_COMBO) { /* - * Make sure we order the hidx load against the _PAGE_COMBO + * Make sure we order the hidx load against the H_PAGE_COMBO * check. The store side ordering is done in __hash_page_4K */ smp_rmb(); @@ -97,9 +68,9 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) { - if ((pte_val(rpte.pte) & _PAGE_COMBO)) + if ((pte_val(rpte.pte) & H_PAGE_COMBO)) return (rpte.hidx >> (index<<2)) & 0xf; - return (pte_val(rpte.pte) >> _PAGE_F_GIX_SHIFT) & 0xf; + return (pte_val(rpte.pte) >> H_PAGE_F_GIX_SHIFT) & 0xf; } #define __rpte_to_pte(r) ((r).pte) @@ -122,79 +93,32 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); #define pte_iterate_hashed_end() } while(0); } } while(0) #define pte_pagesize_index(mm, addr, pte) \ - (((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) - -#define remap_4k_pfn(vma, addr, pfn, prot) \ - (WARN_ON(((pfn) >= (1UL << PTE_RPN_SIZE))) ? -EINVAL : \ - remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, \ - __pgprot(pgprot_val((prot)) | _PAGE_4K_PFN))) - -#define PTE_TABLE_SIZE PTE_FRAG_SIZE -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + (sizeof(unsigned long) << PMD_INDEX_SIZE)) -#else -#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) -#endif -#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) -#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) - -#ifdef CONFIG_HUGETLB_PAGE -/* - * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have - * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; - * - * Defined in such a way that we can optimize away code block at build time - * if CONFIG_HUGETLB_PAGE=n. - */ -static inline int pmd_huge(pmd_t pmd) -{ - /* - * leaf pte for huge page - */ - return !!(pmd_val(pmd) & _PAGE_PTE); -} - -static inline int pud_huge(pud_t pud) -{ - /* - * leaf pte for huge page - */ - return !!(pud_val(pud) & _PAGE_PTE); -} + (((pte) & H_PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) -static inline int pgd_huge(pgd_t pgd) +extern int remap_pfn_range(struct vm_area_struct *, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t); +static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t prot) { - /* - * leaf pte for huge page - */ - return !!(pgd_val(pgd) & _PAGE_PTE); + if (pfn > (PTE_RPN_MASK >> PAGE_SHIFT)) { + WARN(1, "remap_4k_pfn called with wrong pfn value\n"); + return -EINVAL; + } + return remap_pfn_range(vma, addr, pfn, PAGE_SIZE, + __pgprot(pgprot_val(prot) | H_PAGE_4K_PFN)); } -#define pgd_huge pgd_huge -#ifdef CONFIG_DEBUG_VM -extern int hugepd_ok(hugepd_t hpd); -#define is_hugepd(hpd) (hugepd_ok(hpd)) +#define H_PTE_TABLE_SIZE PTE_FRAG_SIZE +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \ + (sizeof(unsigned long) << PMD_INDEX_SIZE)) #else -/* - * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't - * need to setup hugepage directory for them. Our pte and page directory format - * enable us to have this enabled. - */ -static inline int hugepd_ok(hugepd_t hpd) -{ - return 0; -} -#define is_hugepd(pdep) 0 -#endif /* CONFIG_DEBUG_VM */ - -#endif /* CONFIG_HUGETLB_PAGE */ +#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) +#endif +#define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) +#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) #ifdef CONFIG_TRANSPARENT_HUGEPAGE -extern unsigned long pmd_hugepage_update(struct mm_struct *mm, - unsigned long addr, - pmd_t *pmdp, - unsigned long clr, - unsigned long set); static inline char *get_hpte_slot_array(pmd_t *pmdp) { /* @@ -253,50 +177,35 @@ static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, * that for explicit huge pages. * */ -static inline int pmd_trans_huge(pmd_t pmd) +static inline int hash__pmd_trans_huge(pmd_t pmd) { - return !!((pmd_val(pmd) & (_PAGE_PTE | _PAGE_THP_HUGE)) == - (_PAGE_PTE | _PAGE_THP_HUGE)); + return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) == + (_PAGE_PTE | H_PAGE_THP_HUGE)); } -static inline int pmd_large(pmd_t pmd) +static inline int hash__pmd_same(pmd_t pmd_a, pmd_t pmd_b) { - return !!(pmd_val(pmd) & _PAGE_PTE); + return (((pmd_raw(pmd_a) ^ pmd_raw(pmd_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0); } -static inline pmd_t pmd_mknotpresent(pmd_t pmd) +static inline pmd_t hash__pmd_mkhuge(pmd_t pmd) { - return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT); -} - -#define __HAVE_ARCH_PMD_SAME -static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) -{ - return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0); -} - -static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, - unsigned long addr, pmd_t *pmdp) -{ - unsigned long old; - - if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) - return 0; - old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0); - return ((old & _PAGE_ACCESSED) != 0); -} - -#define __HAVE_ARCH_PMDP_SET_WRPROTECT -static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp) -{ - - if ((pmd_val(*pmdp) & _PAGE_RW) == 0) - return; - - pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0); + return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE)); } +extern unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp, + unsigned long clr, unsigned long set); +extern pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); +extern void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); +extern pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); +extern void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); +extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp); +extern int hash__has_transparent_hugepage(void); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index d0ee6fcef823..f61cad3de4e6 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -13,48 +13,12 @@ * We could create separate kernel read-only if we used the 3 PP bits * combinations that newer processors provide but we currently don't. */ -#define _PAGE_BIT_SWAP_TYPE 0 - -#define _PAGE_EXEC 0x00001 /* execute permission */ -#define _PAGE_RW 0x00002 /* read & write access allowed */ -#define _PAGE_READ 0x00004 /* read access allowed */ -#define _PAGE_USER 0x00008 /* page may be accessed by userspace */ -#define _PAGE_GUARDED 0x00010 /* G: guarded (side-effect) page */ -/* M (memory coherence) is always set in the HPTE, so we don't need it here */ -#define _PAGE_COHERENT 0x0 -#define _PAGE_NO_CACHE 0x00020 /* I: cache inhibit */ -#define _PAGE_WRITETHRU 0x00040 /* W: cache write-through */ -#define _PAGE_DIRTY 0x00080 /* C: page changed */ -#define _PAGE_ACCESSED 0x00100 /* R: page referenced */ -#define _PAGE_SPECIAL 0x00400 /* software: special page */ -#define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */ - -#ifdef CONFIG_MEM_SOFT_DIRTY -#define _PAGE_SOFT_DIRTY 0x200 /* software: software dirty tracking */ -#else -#define _PAGE_SOFT_DIRTY 0x000 -#endif - -#define _PAGE_F_GIX_SHIFT 57 -#define _PAGE_F_GIX (7ul << 57) /* HPTE index within HPTEG */ -#define _PAGE_F_SECOND (1ul << 60) /* HPTE is in 2ndary HPTEG */ -#define _PAGE_HASHPTE (1ul << 61) /* PTE has associated HPTE */ -#define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from pointers */ -#define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */ - -/* - * We need to differentiate between explicit huge page and THP huge - * page, since THP huge page also need to track real subpage details - */ -#define _PAGE_THP_HUGE _PAGE_4K_PFN - -/* - * set of bits not changed in pmd_modify. - */ -#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ - _PAGE_ACCESSED | _PAGE_THP_HUGE | _PAGE_PTE | \ - _PAGE_SOFT_DIRTY) - +#define H_PAGE_BUSY 0x00800 /* software: PTE & hash are busy */ +#define H_PTE_NONE_MASK _PAGE_HPTEFLAGS +#define H_PAGE_F_GIX_SHIFT 57 +#define H_PAGE_F_GIX (7ul << 57) /* HPTE index within HPTEG */ +#define H_PAGE_F_SECOND (1ul << 60) /* HPTE is in 2ndary HPTEG */ +#define H_PAGE_HASHPTE (1ul << 61) /* PTE has associated HPTE */ #ifdef CONFIG_PPC_64K_PAGES #include <asm/book3s/64/hash-64k.h> @@ -65,29 +29,33 @@ /* * Size of EA range mapped by our pagetables. */ -#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ - PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) -#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE) +#define H_PGTABLE_EADDR_SIZE (H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \ + H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT) +#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1) +/* + * only with hash we need to use the second half of pmd page table + * to store pointer to deposited pgtable_t + */ +#define H_PMD_CACHE_INDEX (H_PMD_INDEX_SIZE + 1) #else -#define PMD_CACHE_INDEX PMD_INDEX_SIZE +#define H_PMD_CACHE_INDEX H_PMD_INDEX_SIZE #endif /* * Define the address range of the kernel non-linear virtual area */ -#define KERN_VIRT_START ASM_CONST(0xD000000000000000) -#define KERN_VIRT_SIZE ASM_CONST(0x0000100000000000) +#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000) +#define H_KERN_VIRT_SIZE ASM_CONST(0x0000100000000000) /* * The vmalloc space starts at the beginning of that region, and * occupies half of it on hash CPUs and a quarter of it on Book3E * (we keep a quarter for the virtual memmap) */ -#define VMALLOC_START KERN_VIRT_START -#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) -#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) +#define H_VMALLOC_START H_KERN_VIRT_START +#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE >> 1) +#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) /* * Region IDs @@ -96,7 +64,7 @@ #define REGION_MASK (0xfUL << REGION_SHIFT) #define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) -#define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START)) +#define VMALLOC_REGION_ID (REGION_ID(H_VMALLOC_START)) #define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET)) #define VMEMMAP_REGION_ID (0xfUL) /* Server only */ #define USER_REGION_ID (0UL) @@ -105,381 +73,97 @@ * Defines the address of the vmemap area, in its own region on * hash table CPUs. */ -#define VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT) +#define H_VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT) #ifdef CONFIG_PPC_MM_SLICES #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN #endif /* CONFIG_PPC_MM_SLICES */ -/* No separate kernel read-only */ -#define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */ -#define _PAGE_KERNEL_RO _PAGE_KERNEL_RW -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) - -/* Strong Access Ordering */ -#define _PAGE_SAO (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT) - -/* No page size encoding in the linux PTE */ -#define _PAGE_PSIZE 0 /* PTEIDX nibble */ #define _PTEIDX_SECONDARY 0x8 #define _PTEIDX_GROUP_IX 0x7 -/* Hash table based platforms need atomic updates of the linux PTE */ -#define PTE_ATOMIC_UPDATES 1 -#define _PTE_NONE_MASK _PAGE_HPTEFLAGS -/* - * The mask convered by the RPN must be a ULL on 32-bit platforms with - * 64-bit PTEs - */ -#define PTE_RPN_MASK (((1UL << PTE_RPN_SIZE) - 1) << PTE_RPN_SHIFT) -/* - * _PAGE_CHG_MASK masks of bits that are to be preserved across - * pgprot changes - */ -#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ - _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \ - _PAGE_SOFT_DIRTY) -/* - * Mask of bits returned by pte_pgprot() - */ -#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \ - _PAGE_WRITETHRU | _PAGE_4K_PFN | \ - _PAGE_USER | _PAGE_ACCESSED | \ - _PAGE_RW | _PAGE_DIRTY | _PAGE_EXEC | \ - _PAGE_SOFT_DIRTY) -/* - * We define 2 sets of base prot bits, one for basic pages (ie, - * cacheable kernel and user pages) and one for non cacheable - * pages. We always set _PAGE_COHERENT when SMP is enabled or - * the processor might need it for DMA coherency. - */ -#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE) -#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT) - -/* Permission masks used to generate the __P and __S table, - * - * Note:__pgprot is defined in arch/powerpc/include/asm/page.h - * - * Write permissions imply read permissions for now (we could make write-only - * pages on BookE but we don't bother for now). Execute permission control is - * possible on platforms that define _PAGE_EXEC - * - * Note due to the way vm flags are laid out, the bits are XWR - */ -#define PAGE_NONE __pgprot(_PAGE_BASE) -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \ - _PAGE_EXEC) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER ) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER ) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) - -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY -#define __P100 PAGE_READONLY_X -#define __P101 PAGE_READONLY_X -#define __P110 PAGE_COPY_X -#define __P111 PAGE_COPY_X - -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_X -#define __S101 PAGE_READONLY_X -#define __S110 PAGE_SHARED_X -#define __S111 PAGE_SHARED_X - -/* Permission masks used for kernel mappings */ -#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) -#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ - _PAGE_NO_CACHE) -#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ - _PAGE_NO_CACHE | _PAGE_GUARDED) -#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX) -#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO) -#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX) - -/* Protection used for kernel text. We want the debuggers to be able to - * set breakpoints anywhere, so don't write protect the kernel text - * on platforms where such control is possible. - */ -#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\ - defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE) -#define PAGE_KERNEL_TEXT PAGE_KERNEL_X -#else -#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX -#endif - -/* Make modules code happy. We don't set RO yet */ -#define PAGE_KERNEL_EXEC PAGE_KERNEL_X -#define PAGE_AGP (PAGE_KERNEL_NC) - -#define PMD_BAD_BITS (PTE_TABLE_SIZE-1) -#define PUD_BAD_BITS (PMD_TABLE_SIZE-1) +#define H_PMD_BAD_BITS (PTE_TABLE_SIZE-1) +#define H_PUD_BAD_BITS (PMD_TABLE_SIZE-1) #ifndef __ASSEMBLY__ -#define pmd_bad(pmd) (pmd_val(pmd) & PMD_BAD_BITS) -#define pmd_page_vaddr(pmd) __va(pmd_val(pmd) & ~PMD_MASKED_BITS) - -#define pud_bad(pud) (pud_val(pud) & PUD_BAD_BITS) -#define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS) - -/* Pointers in the page table tree are physical addresses */ -#define __pgtable_ptr_val(ptr) __pa(ptr) - -#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) -#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) -#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) -#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) +#define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS) +#define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS) +static inline int hash__pgd_bad(pgd_t pgd) +{ + return (pgd_val(pgd) == 0); +} extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long pte, int huge); extern unsigned long htab_convert_pte_flags(unsigned long pteflags); /* Atomic PTE updates */ -static inline unsigned long pte_update(struct mm_struct *mm, - unsigned long addr, - pte_t *ptep, unsigned long clr, - unsigned long set, - int huge) +static inline unsigned long hash__pte_update(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, unsigned long clr, + unsigned long set, + int huge) { - unsigned long old, tmp; + __be64 old_be, tmp_be; + unsigned long old; __asm__ __volatile__( "1: ldarx %0,0,%3 # pte_update\n\ - andi. %1,%0,%6\n\ + and. %1,%0,%6\n\ bne- 1b \n\ andc %1,%0,%4 \n\ or %1,%1,%7\n\ stdcx. %1,0,%3 \n\ bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*ptep) - : "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set) + : "=&r" (old_be), "=&r" (tmp_be), "=m" (*ptep) + : "r" (ptep), "r" (cpu_to_be64(clr)), "m" (*ptep), + "r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set)) : "cc" ); /* huge pages use the old page table lock */ if (!huge) assert_pte_locked(mm, addr); - if (old & _PAGE_HASHPTE) + old = be64_to_cpu(old_be); + if (old & H_PAGE_HASHPTE) hpte_need_flush(mm, addr, ptep, old, huge); return old; } -static inline int __ptep_test_and_clear_young(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - unsigned long old; - - if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) - return 0; - old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); - return (old & _PAGE_ACCESSED) != 0; -} -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -#define ptep_test_and_clear_young(__vma, __addr, __ptep) \ -({ \ - int __r; \ - __r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \ - __r; \ -}) - -#define __HAVE_ARCH_PTEP_SET_WRPROTECT -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - - if ((pte_val(*ptep) & _PAGE_RW) == 0) - return; - - pte_update(mm, addr, ptep, _PAGE_RW, 0, 0); -} - -static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - if ((pte_val(*ptep) & _PAGE_RW) == 0) - return; - - pte_update(mm, addr, ptep, _PAGE_RW, 0, 1); -} - -/* - * We currently remove entries from the hashtable regardless of whether - * the entry was young or dirty. The generic routines only flush if the - * entry was young or dirty which is not good enough. - * - * We should be more intelligent about this but for the moment we override - * these functions and force a tlb flush unconditionally - */ -#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH -#define ptep_clear_flush_young(__vma, __address, __ptep) \ -({ \ - int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \ - __ptep); \ - __young; \ -}) - -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0); - return __pte(old); -} - -static inline void pte_clear(struct mm_struct *mm, unsigned long addr, - pte_t * ptep) -{ - pte_update(mm, addr, ptep, ~0UL, 0, 0); -} - - /* Set the dirty and/or accessed bits atomically in a linux PTE, this * function doesn't need to flush the hash entry */ -static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) +static inline void hash__ptep_set_access_flags(pte_t *ptep, pte_t entry) { - unsigned long bits = pte_val(entry) & - (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC | - _PAGE_SOFT_DIRTY); + __be64 old, tmp, val, mask; - unsigned long old, tmp; + mask = cpu_to_be64(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_READ | _PAGE_WRITE | + _PAGE_EXEC | _PAGE_SOFT_DIRTY); + + val = pte_raw(entry) & mask; __asm__ __volatile__( "1: ldarx %0,0,%4\n\ - andi. %1,%0,%6\n\ + and. %1,%0,%6\n\ bne- 1b \n\ or %0,%3,%0\n\ stdcx. %0,0,%4\n\ bne- 1b" :"=&r" (old), "=&r" (tmp), "=m" (*ptep) - :"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY) + :"r" (val), "r" (ptep), "m" (*ptep), "r" (cpu_to_be64(H_PAGE_BUSY)) :"cc"); } -static inline int pgd_bad(pgd_t pgd) -{ - return (pgd_val(pgd) == 0); -} - -#define __HAVE_ARCH_PTE_SAME -#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) -static inline unsigned long pgd_page_vaddr(pgd_t pgd) -{ - return (unsigned long)__va(pgd_val(pgd) & ~PGD_MASKED_BITS); -} - - -/* Generic accessors to PTE bits */ -static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} -static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); } -static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); } -static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); } -static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } -static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } - -#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY -static inline bool pte_soft_dirty(pte_t pte) -{ - return !!(pte_val(pte) & _PAGE_SOFT_DIRTY); -} -static inline pte_t pte_mksoft_dirty(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY); -} - -static inline pte_t pte_clear_soft_dirty(pte_t pte) -{ - return __pte(pte_val(pte) & ~_PAGE_SOFT_DIRTY); -} -#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ - -#ifdef CONFIG_NUMA_BALANCING -/* - * These work without NUMA balancing but the kernel does not care. See the - * comment in include/asm-generic/pgtable.h . On powerpc, this will only - * work for user pages and always return true for kernel pages. - */ -static inline int pte_protnone(pte_t pte) -{ - return (pte_val(pte) & - (_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT; -} -#endif /* CONFIG_NUMA_BALANCING */ - -static inline int pte_present(pte_t pte) -{ - return !!(pte_val(pte) & _PAGE_PRESENT); -} - -/* Conversion functions: convert a page and protection to a page entry, - * and a page entry and page directory to the page they refer to. - * - * Even if PTEs can be unsigned long long, a PFN is always an unsigned - * long for now. - */ -static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) -{ - return __pte((((pte_basic_t)(pfn) << PTE_RPN_SHIFT) & PTE_RPN_MASK) | - pgprot_val(pgprot)); -} - -static inline unsigned long pte_pfn(pte_t pte) -{ - return (pte_val(pte) & PTE_RPN_MASK) >> PTE_RPN_SHIFT; -} - -/* Generic modifiers for PTE bits */ -static inline pte_t pte_wrprotect(pte_t pte) -{ - return __pte(pte_val(pte) & ~_PAGE_RW); -} - -static inline pte_t pte_mkclean(pte_t pte) -{ - return __pte(pte_val(pte) & ~_PAGE_DIRTY); -} - -static inline pte_t pte_mkold(pte_t pte) -{ - return __pte(pte_val(pte) & ~_PAGE_ACCESSED); -} - -static inline pte_t pte_mkwrite(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_RW); -} - -static inline pte_t pte_mkdirty(pte_t pte) +static inline int hash__pte_same(pte_t pte_a, pte_t pte_b) { - return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY); + return (((pte_raw(pte_a) ^ pte_raw(pte_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0); } -static inline pte_t pte_mkyoung(pte_t pte) +static inline int hash__pte_none(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_ACCESSED); -} - -static inline pte_t pte_mkspecial(pte_t pte) -{ - return __pte(pte_val(pte) | _PAGE_SPECIAL); -} - -static inline pte_t pte_mkhuge(pte_t pte) -{ - return pte; -} - -static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) -{ - return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); + return (pte_val(pte) & ~H_PTE_NONE_MASK) == 0; } /* This low level function performs the actual PTE insertion @@ -487,8 +171,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * an horrible mess that I'm not going to try to clean up now but * I'm keeping it in one place rather than spread around */ -static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte, int percpu) +static inline void hash__set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, int percpu) { /* * Anything else just stores the PTE normally. That covers all 64-bit @@ -497,53 +181,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, *ptep = pte; } -/* - * Macro to mark a page protection value as "uncacheable". - */ - -#define _PAGE_CACHE_CTL (_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \ - _PAGE_WRITETHRU) - -#define pgprot_noncached pgprot_noncached -static inline pgprot_t pgprot_noncached(pgprot_t prot) -{ - return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | - _PAGE_NO_CACHE | _PAGE_GUARDED); -} - -#define pgprot_noncached_wc pgprot_noncached_wc -static inline pgprot_t pgprot_noncached_wc(pgprot_t prot) -{ - return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | - _PAGE_NO_CACHE); -} - -#define pgprot_cached pgprot_cached -static inline pgprot_t pgprot_cached(pgprot_t prot) -{ - return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | - _PAGE_COHERENT); -} - -#define pgprot_cached_wthru pgprot_cached_wthru -static inline pgprot_t pgprot_cached_wthru(pgprot_t prot) -{ - return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | - _PAGE_COHERENT | _PAGE_WRITETHRU); -} - -#define pgprot_cached_noncoherent pgprot_cached_noncoherent -static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot) -{ - return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL); -} - -#define pgprot_writecombine pgprot_writecombine -static inline pgprot_t pgprot_writecombine(pgprot_t prot) -{ - return pgprot_noncached_wc(prot); -} - #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long old_pmd); @@ -556,6 +193,14 @@ static inline void hpte_do_hugepage_flush(struct mm_struct *mm, } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +extern int hash__map_kernel_page(unsigned long ea, unsigned long pa, + unsigned long flags); +extern int __meminit hash__vmemmap_create_mapping(unsigned long start, + unsigned long page_size, + unsigned long phys); +extern void hash__vmemmap_remove_mapping(unsigned long start, + unsigned long page_size); #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */ diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h b/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h new file mode 100644 index 000000000000..60f47649306f --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h @@ -0,0 +1,14 @@ +#ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_RADIX_H +#define _ASM_POWERPC_BOOK3S_64_HUGETLB_RADIX_H +/* + * For radix we want generic code to handle hugetlb. But then if we want + * both hash and radix to be enabled together we need to workaround the + * limitations. + */ +void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +extern unsigned long +radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); +#endif diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 0cea4807e26f..290157e8d5b2 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -1,5 +1,5 @@ -#ifndef _ASM_POWERPC_MMU_HASH64_H_ -#define _ASM_POWERPC_MMU_HASH64_H_ +#ifndef _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_ +#define _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_ /* * PowerPC64 memory management structures * @@ -78,6 +78,10 @@ #define HPTE_V_SECONDARY ASM_CONST(0x0000000000000002) #define HPTE_V_VALID ASM_CONST(0x0000000000000001) +/* + * ISA 3.0 have a different HPTE format. + */ +#define HPTE_R_3_0_SSIZE_SHIFT 58 #define HPTE_R_PP0 ASM_CONST(0x8000000000000000) #define HPTE_R_TS ASM_CONST(0x4000000000000000) #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) @@ -115,6 +119,7 @@ #define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */ #define POWER8_TLB_SETS 512 /* # sets in POWER8 TLB */ #define POWER9_TLB_SETS_HASH 256 /* # sets in POWER9 TLB Hash mode */ +#define POWER9_TLB_SETS_RADIX 128 /* # sets in POWER9 TLB Radix mode */ #ifndef __ASSEMBLY__ @@ -127,24 +132,6 @@ extern struct hash_pte *htab_address; extern unsigned long htab_size_bytes; extern unsigned long htab_hash_mask; -/* - * Page size definition - * - * shift : is the "PAGE_SHIFT" value for that page size - * sllp : is a bit mask with the value of SLB L || LP to be or'ed - * directly to a slbmte "vsid" value - * penc : is the HPTE encoding mask for the "LP" field: - * - */ -struct mmu_psize_def -{ - unsigned int shift; /* number of bits */ - int penc[MMU_PAGE_COUNT]; /* HPTE encoding */ - unsigned int tlbiel; /* tlbiel supported for that page size */ - unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */ - unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */ -}; -extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; static inline int shift_to_mmu_psize(unsigned int shift) { @@ -210,11 +197,6 @@ static inline int segment_shift(int ssize) /* * The current system page and segment sizes */ -extern int mmu_linear_psize; -extern int mmu_virtual_psize; -extern int mmu_vmalloc_psize; -extern int mmu_vmemmap_psize; -extern int mmu_io_psize; extern int mmu_kernel_ssize; extern int mmu_highuser_ssize; extern u16 mmu_slb_size; @@ -247,7 +229,8 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize, */ v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm); v <<= HPTE_V_AVPN_SHIFT; - v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; return v; } @@ -271,8 +254,12 @@ static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize, * aligned for the requested page size */ static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize, - int actual_psize) + int actual_psize, int ssize) { + + if (cpu_has_feature(CPU_FTR_ARCH_300)) + pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT; + /* A 4K page needs no special encoding */ if (actual_psize == MMU_PAGE_4K) return pa & HPTE_R_RPN; @@ -476,7 +463,7 @@ extern void slb_set_size(u16 size); add rt,rt,rx /* 4 bits per slice and we have one slice per 1TB */ -#define SLICE_ARRAY_SIZE (PGTABLE_RANGE >> 41) +#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41) #ifndef __ASSEMBLY__ @@ -512,38 +499,6 @@ static inline void subpage_prot_free(struct mm_struct *mm) {} static inline void subpage_prot_init_new_context(struct mm_struct *mm) { } #endif /* CONFIG_PPC_SUBPAGE_PROT */ -typedef unsigned long mm_context_id_t; -struct spinlock; - -typedef struct { - mm_context_id_t id; - u16 user_psize; /* page size index */ - -#ifdef CONFIG_PPC_MM_SLICES - u64 low_slices_psize; /* SLB page size encodings */ - unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; -#else - u16 sllp; /* SLB page size encoding */ -#endif - unsigned long vdso_base; -#ifdef CONFIG_PPC_SUBPAGE_PROT - struct subpage_prot_table spt; -#endif /* CONFIG_PPC_SUBPAGE_PROT */ -#ifdef CONFIG_PPC_ICSWX - struct spinlock *cop_lockp; /* guard acop and cop_pid */ - unsigned long acop; /* mask of enabled coprocessor types */ - unsigned int cop_pid; /* pid value used with coprocessors */ -#endif /* CONFIG_PPC_ICSWX */ -#ifdef CONFIG_PPC_64K_PAGES - /* for 4K PTE fragment support */ - void *pte_frag; -#endif -#ifdef CONFIG_SPAPR_TCE_IOMMU - struct list_head iommu_group_mem_list; -#endif -} mm_context_t; - - #if 0 /* * The code below is equivalent to this function for arguments @@ -579,7 +534,7 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, /* * Bad address. We return VSID 0 for that */ - if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) + if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) return 0; if (ssize == MMU_SEGSIZE_256M) @@ -613,4 +568,4 @@ unsigned htab_shift_for_mem_size(unsigned long mem_size); #endif /* __ASSEMBLY__ */ -#endif /* _ASM_POWERPC_MMU_HASH64_H_ */ +#endif /* _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h new file mode 100644 index 000000000000..5854263d4d6e --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -0,0 +1,137 @@ +#ifndef _ASM_POWERPC_BOOK3S_64_MMU_H_ +#define _ASM_POWERPC_BOOK3S_64_MMU_H_ + +#ifndef __ASSEMBLY__ +/* + * Page size definition + * + * shift : is the "PAGE_SHIFT" value for that page size + * sllp : is a bit mask with the value of SLB L || LP to be or'ed + * directly to a slbmte "vsid" value + * penc : is the HPTE encoding mask for the "LP" field: + * + */ +struct mmu_psize_def { + unsigned int shift; /* number of bits */ + int penc[MMU_PAGE_COUNT]; /* HPTE encoding */ + unsigned int tlbiel; /* tlbiel supported for that page size */ + unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */ + union { + unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */ + unsigned long ap; /* Ap encoding used by PowerISA 3.0 */ + }; +}; +extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; + +#define radix_enabled() mmu_has_feature(MMU_FTR_RADIX) + +#endif /* __ASSEMBLY__ */ + +/* 64-bit classic hash table MMU */ +#include <asm/book3s/64/mmu-hash.h> + +#ifndef __ASSEMBLY__ +/* + * ISA 3.0 partiton and process table entry format + */ +struct prtb_entry { + __be64 prtb0; + __be64 prtb1; +}; +extern struct prtb_entry *process_tb; + +struct patb_entry { + __be64 patb0; + __be64 patb1; +}; +extern struct patb_entry *partition_tb; + +#define PATB_HR (1UL << 63) +#define PATB_GR (1UL << 63) +#define RPDB_MASK 0x0ffffffffffff00fUL +#define RPDB_SHIFT (1UL << 8) +/* + * Limit process table to PAGE_SIZE table. This + * also limit the max pid we can support. + * MAX_USER_CONTEXT * 16 bytes of space. + */ +#define PRTB_SIZE_SHIFT (CONTEXT_BITS + 4) +/* + * Power9 currently only support 64K partition table size. + */ +#define PATB_SIZE_SHIFT 16 + +typedef unsigned long mm_context_id_t; +struct spinlock; + +typedef struct { + mm_context_id_t id; + u16 user_psize; /* page size index */ + +#ifdef CONFIG_PPC_MM_SLICES + u64 low_slices_psize; /* SLB page size encodings */ + unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; +#else + u16 sllp; /* SLB page size encoding */ +#endif + unsigned long vdso_base; +#ifdef CONFIG_PPC_SUBPAGE_PROT + struct subpage_prot_table spt; +#endif /* CONFIG_PPC_SUBPAGE_PROT */ +#ifdef CONFIG_PPC_ICSWX + struct spinlock *cop_lockp; /* guard acop and cop_pid */ + unsigned long acop; /* mask of enabled coprocessor types */ + unsigned int cop_pid; /* pid value used with coprocessors */ +#endif /* CONFIG_PPC_ICSWX */ +#ifdef CONFIG_PPC_64K_PAGES + /* for 4K PTE fragment support */ + void *pte_frag; +#endif +#ifdef CONFIG_SPAPR_TCE_IOMMU + struct list_head iommu_group_mem_list; +#endif +} mm_context_t; + +/* + * The current system page and segment sizes + */ +extern int mmu_linear_psize; +extern int mmu_virtual_psize; +extern int mmu_vmalloc_psize; +extern int mmu_vmemmap_psize; +extern int mmu_io_psize; + +/* MMU initialization */ +extern void radix_init_native(void); +extern void hash__early_init_mmu(void); +extern void radix__early_init_mmu(void); +static inline void early_init_mmu(void) +{ + if (radix_enabled()) + return radix__early_init_mmu(); + return hash__early_init_mmu(); +} +extern void hash__early_init_mmu_secondary(void); +extern void radix__early_init_mmu_secondary(void); +static inline void early_init_mmu_secondary(void) +{ + if (radix_enabled()) + return radix__early_init_mmu_secondary(); + return hash__early_init_mmu_secondary(); +} + +extern void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size); +extern void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size); +static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + if (radix_enabled()) + return radix__setup_initial_memory_limit(first_memblock_base, + first_memblock_size); + return hash__setup_initial_memory_limit(first_memblock_base, + first_memblock_size); +} +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h new file mode 100644 index 000000000000..488279edb1f0 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -0,0 +1,207 @@ +#ifndef _ASM_POWERPC_BOOK3S_64_PGALLOC_H +#define _ASM_POWERPC_BOOK3S_64_PGALLOC_H +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/slab.h> +#include <linux/cpumask.h> +#include <linux/percpu.h> + +struct vmemmap_backing { + struct vmemmap_backing *list; + unsigned long phys; + unsigned long virt_addr; +}; +extern struct vmemmap_backing *vmemmap_list; + +/* + * Functions that deal with pagetables that could be at any level of + * the table need to be passed an "index_size" so they know how to + * handle allocation. For PTE pages (which are linked to a struct + * page for now, and drawn from the main get_free_pages() pool), the + * allocation size will be (2^index_size * sizeof(pointer)) and + * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). + * + * The maximum index size needs to be big enough to allow any + * pagetable sizes we need, but small enough to fit in the low bits of + * any page table pointer. In other words all pagetables, even tiny + * ones, must be aligned to allow at least enough low 0 bits to + * contain this value. This value is also used as a mask, so it must + * be one less than a power of two. + */ +#define MAX_PGTABLE_INDEX_SIZE 0xf + +extern struct kmem_cache *pgtable_cache[]; +#define PGT_CACHE(shift) ({ \ + BUG_ON(!(shift)); \ + pgtable_cache[(shift) - 1]; \ + }) + +#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO + +extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int); +extern void pte_fragment_free(unsigned long *, int); +extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); +#ifdef CONFIG_SMP +extern void __tlb_remove_table(void *_table); +#endif + +static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) +{ +#ifdef CONFIG_PPC_64K_PAGES + return (pgd_t *)__get_free_page(PGALLOC_GFP); +#else + struct page *page; + page = alloc_pages(PGALLOC_GFP, 4); + if (!page) + return NULL; + return (pgd_t *) page_address(page); +#endif +} + +static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ +#ifdef CONFIG_PPC_64K_PAGES + free_page((unsigned long)pgd); +#else + free_pages((unsigned long)pgd, 4); +#endif +} + +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + if (radix_enabled()) + return radix__pgd_alloc(mm); + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL); +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + if (radix_enabled()) + return radix__pgd_free(mm, pgd); + kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); +} + +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +{ + pgd_set(pgd, __pgtable_ptr_val(pud) | PGD_VAL_BITS); +} + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), + GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud); +} + +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + pud_set(pud, __pgtable_ptr_val(pmd) | PUD_VAL_BITS); +} + +static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, + unsigned long address) +{ + pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE); +} + +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), + GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) +{ + kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd); +} + +static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, + unsigned long address) +{ + return pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX); +} + +static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, + pte_t *pte) +{ + pmd_set(pmd, __pgtable_ptr_val(pte) | PMD_VAL_BITS); +} + +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, + pgtable_t pte_page) +{ + pmd_set(pmd, __pgtable_ptr_val(pte_page) | PMD_VAL_BITS); +} + +static inline pgtable_t pmd_pgtable(pmd_t pmd) +{ + return (pgtable_t)pmd_page_vaddr(pmd); +} + +#ifdef CONFIG_PPC_4K_PAGES +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, + unsigned long address) +{ + return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); +} + +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, + unsigned long address) +{ + struct page *page; + pte_t *pte; + + pte = pte_alloc_one_kernel(mm, address); + if (!pte) + return NULL; + page = virt_to_page(pte); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } + return pte; +} +#else /* if CONFIG_PPC_64K_PAGES */ + +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, + unsigned long address) +{ + return (pte_t *)pte_fragment_alloc(mm, address, 1); +} + +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, + unsigned long address) +{ + return (pgtable_t)pte_fragment_alloc(mm, address, 0); +} +#endif + +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + pte_fragment_free((unsigned long *)pte, 1); +} + +static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) +{ + pte_fragment_free((unsigned long *)ptepage, 0); +} + +static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, + unsigned long address) +{ + tlb_flush_pgtable(tlb, address); + pgtable_free_tlb(tlb, table, 0); +} + +#define check_pgt_cache() do { } while (0) + +#endif /* _ASM_POWERPC_BOOK3S_64_PGALLOC_H */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h new file mode 100644 index 000000000000..71e9abced493 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h @@ -0,0 +1,53 @@ +#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H +#define _ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H +/* + * hash 4k can't share hugetlb and also doesn't support THP + */ +#ifndef __ASSEMBLY__ +#ifdef CONFIG_HUGETLB_PAGE +static inline int pmd_huge(pmd_t pmd) +{ + /* + * leaf pte for huge page + */ + if (radix_enabled()) + return !!(pmd_val(pmd) & _PAGE_PTE); + return 0; +} + +static inline int pud_huge(pud_t pud) +{ + /* + * leaf pte for huge page + */ + if (radix_enabled()) + return !!(pud_val(pud) & _PAGE_PTE); + return 0; +} + +static inline int pgd_huge(pgd_t pgd) +{ + /* + * leaf pte for huge page + */ + if (radix_enabled()) + return !!(pgd_val(pgd) & _PAGE_PTE); + return 0; +} +#define pgd_huge pgd_huge +/* + * With radix , we have hugepage ptes in the pud and pmd entries. We don't + * need to setup hugepage directory for them. Our pte and page directory format + * enable us to have this enabled. + */ +static inline int hugepd_ok(hugepd_t hpd) +{ + if (radix_enabled()) + return 0; + return hash__hugepd_ok(hpd); +} +#define is_hugepd(hpd) (hugepd_ok(hpd)) +#endif /* CONFIG_HUGETLB_PAGE */ +#endif /* __ASSEMBLY__ */ + +#endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h new file mode 100644 index 000000000000..cb2d0a5fa3f8 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h @@ -0,0 +1,64 @@ +#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H +#define _ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_HUGETLB_PAGE +/* + * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have + * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; + * + * Defined in such a way that we can optimize away code block at build time + * if CONFIG_HUGETLB_PAGE=n. + */ +static inline int pmd_huge(pmd_t pmd) +{ + /* + * leaf pte for huge page + */ + return !!(pmd_val(pmd) & _PAGE_PTE); +} + +static inline int pud_huge(pud_t pud) +{ + /* + * leaf pte for huge page + */ + return !!(pud_val(pud) & _PAGE_PTE); +} + +static inline int pgd_huge(pgd_t pgd) +{ + /* + * leaf pte for huge page + */ + return !!(pgd_val(pgd) & _PAGE_PTE); +} +#define pgd_huge pgd_huge + +#ifdef CONFIG_DEBUG_VM +extern int hugepd_ok(hugepd_t hpd); +#define is_hugepd(hpd) (hugepd_ok(hpd)) +#else +/* + * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't + * need to setup hugepage directory for them. Our pte and page directory format + * enable us to have this enabled. + */ +static inline int hugepd_ok(hugepd_t hpd) +{ + return 0; +} +#define is_hugepd(pdep) 0 +#endif /* CONFIG_DEBUG_VM */ + +#endif /* CONFIG_HUGETLB_PAGE */ + +static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t prot) +{ + if (radix_enabled()) + BUG(); + return hash__remap_4k_pfn(vma, addr, pfn, prot); +} +#endif /* __ASSEMBLY__ */ +#endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 77d3ce05798e..88a5ecaa157b 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1,13 +1,247 @@ #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ + +/* + * Common bits between hash and Radix page table + */ +#define _PAGE_BIT_SWAP_TYPE 0 + +#define _PAGE_EXEC 0x00001 /* execute permission */ +#define _PAGE_WRITE 0x00002 /* write access allowed */ +#define _PAGE_READ 0x00004 /* read access allowed */ +#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) +#define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC) +#define _PAGE_PRIVILEGED 0x00008 /* kernel access only */ +#define _PAGE_SAO 0x00010 /* Strong access order */ +#define _PAGE_NON_IDEMPOTENT 0x00020 /* non idempotent memory */ +#define _PAGE_TOLERANT 0x00030 /* tolerant memory, cache inhibited */ +#define _PAGE_DIRTY 0x00080 /* C: page changed */ +#define _PAGE_ACCESSED 0x00100 /* R: page referenced */ /* - * This file contains the functions and defines necessary to modify and use - * the ppc64 hashed page table. + * Software bits */ +#define _RPAGE_SW0 0x2000000000000000UL +#define _RPAGE_SW1 0x00800 +#define _RPAGE_SW2 0x00400 +#define _RPAGE_SW3 0x00200 +#ifdef CONFIG_MEM_SOFT_DIRTY +#define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking */ +#else +#define _PAGE_SOFT_DIRTY 0x00000 +#endif +#define _PAGE_SPECIAL _RPAGE_SW2 /* software: special page */ + + +#define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from pointers */ +#define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */ +/* + * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE + * Instead of fixing all of them, add an alternate define which + * maps CI pte mapping. + */ +#define _PAGE_NO_CACHE _PAGE_TOLERANT +/* + * We support 57 bit real address in pte. Clear everything above 57, and + * every thing below PAGE_SHIFT; + */ +#define PTE_RPN_MASK (((1UL << 57) - 1) & (PAGE_MASK)) +/* + * set of bits not changed in pmd_modify. Even though we have hash specific bits + * in here, on radix we expect them to be zero. + */ +#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ + _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \ + _PAGE_SOFT_DIRTY) +/* + * user access blocked by key + */ +#define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY) +#define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_READ) +#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \ + _PAGE_RW | _PAGE_EXEC) +/* + * No page size encoding in the linux PTE + */ +#define _PAGE_PSIZE 0 +/* + * _PAGE_CHG_MASK masks of bits that are to be preserved across + * pgprot changes + */ +#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ + _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \ + _PAGE_SOFT_DIRTY) +/* + * Mask of bits returned by pte_pgprot() + */ +#define PAGE_PROT_BITS (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \ + H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \ + _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \ + _PAGE_SOFT_DIRTY) +/* + * We define 2 sets of base prot bits, one for basic pages (ie, + * cacheable kernel and user pages) and one for non cacheable + * pages. We always set _PAGE_COHERENT when SMP is enabled or + * the processor might need it for DMA coherency. + */ +#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE) +#define _PAGE_BASE (_PAGE_BASE_NC) + +/* Permission masks used to generate the __P and __S table, + * + * Note:__pgprot is defined in arch/powerpc/include/asm/page.h + * + * Write permissions imply read permissions for now (we could make write-only + * pages on BookE but we don't bother for now). Execute permission control is + * possible on platforms that define _PAGE_EXEC + * + * Note due to the way vm flags are laid out, the bits are XWR + */ +#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_PRIVILEGED) +#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW) +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC) +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_READ) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_READ) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) + +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY_X +#define __P101 PAGE_READONLY_X +#define __P110 PAGE_COPY_X +#define __P111 PAGE_COPY_X + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY_X +#define __S101 PAGE_READONLY_X +#define __S110 PAGE_SHARED_X +#define __S111 PAGE_SHARED_X + +/* Permission masks used for kernel mappings */ +#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) +#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ + _PAGE_TOLERANT) +#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ + _PAGE_NON_IDEMPOTENT) +#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX) +#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO) +#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX) + +/* + * Protection used for kernel text. We want the debuggers to be able to + * set breakpoints anywhere, so don't write protect the kernel text + * on platforms where such control is possible. + */ +#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) || \ + defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE) +#define PAGE_KERNEL_TEXT PAGE_KERNEL_X +#else +#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX +#endif + +/* Make modules code happy. We don't set RO yet */ +#define PAGE_KERNEL_EXEC PAGE_KERNEL_X +#define PAGE_AGP (PAGE_KERNEL_NC) + +#ifndef __ASSEMBLY__ +/* + * page table defines + */ +extern unsigned long __pte_index_size; +extern unsigned long __pmd_index_size; +extern unsigned long __pud_index_size; +extern unsigned long __pgd_index_size; +extern unsigned long __pmd_cache_index; +#define PTE_INDEX_SIZE __pte_index_size +#define PMD_INDEX_SIZE __pmd_index_size +#define PUD_INDEX_SIZE __pud_index_size +#define PGD_INDEX_SIZE __pgd_index_size +#define PMD_CACHE_INDEX __pmd_cache_index +/* + * Because of use of pte fragments and THP, size of page table + * are not always derived out of index size above. + */ +extern unsigned long __pte_table_size; +extern unsigned long __pmd_table_size; +extern unsigned long __pud_table_size; +extern unsigned long __pgd_table_size; +#define PTE_TABLE_SIZE __pte_table_size +#define PMD_TABLE_SIZE __pmd_table_size +#define PUD_TABLE_SIZE __pud_table_size +#define PGD_TABLE_SIZE __pgd_table_size + +extern unsigned long __pmd_val_bits; +extern unsigned long __pud_val_bits; +extern unsigned long __pgd_val_bits; +#define PMD_VAL_BITS __pmd_val_bits +#define PUD_VAL_BITS __pud_val_bits +#define PGD_VAL_BITS __pgd_val_bits + +extern unsigned long __pte_frag_nr; +#define PTE_FRAG_NR __pte_frag_nr +extern unsigned long __pte_frag_size_shift; +#define PTE_FRAG_SIZE_SHIFT __pte_frag_size_shift +#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT) +/* + * Pgtable size used by swapper, init in asm code + */ +#define MAX_PGD_TABLE_SIZE (sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE) + +#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) +#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) +#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) + +/* PMD_SHIFT determines what a second-level page table entry can map */ +#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* PUD_SHIFT determines what a third-level page table entry can map */ +#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ +#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* Bits to mask out from a PMD to get to the PTE page */ +#define PMD_MASKED_BITS 0xc0000000000000ffUL +/* Bits to mask out from a PUD to get to the PMD page */ +#define PUD_MASKED_BITS 0xc0000000000000ffUL +/* Bits to mask out from a PGD to get to the PUD page */ +#define PGD_MASKED_BITS 0xc0000000000000ffUL + +extern unsigned long __vmalloc_start; +extern unsigned long __vmalloc_end; +#define VMALLOC_START __vmalloc_start +#define VMALLOC_END __vmalloc_end + +extern unsigned long __kernel_virt_start; +extern unsigned long __kernel_virt_size; +#define KERN_VIRT_START __kernel_virt_start +#define KERN_VIRT_SIZE __kernel_virt_size +extern struct page *vmemmap; +extern unsigned long ioremap_bot; +#endif /* __ASSEMBLY__ */ #include <asm/book3s/64/hash.h> -#include <asm/barrier.h> +#include <asm/book3s/64/radix.h> +#ifdef CONFIG_PPC_64K_PAGES +#include <asm/book3s/64/pgtable-64k.h> +#else +#include <asm/book3s/64/pgtable-4k.h> +#endif + +#include <asm/barrier.h> /* * The second half of the kernel virtual space is used for IO mappings, * it's itself carved into the PIO region (ISA and PHB IO space) and @@ -26,8 +260,6 @@ #define IOREMAP_BASE (PHB_IO_END) #define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE) -#define vmemmap ((struct page *)VMEMMAP_BASE) - /* Advertise special mapping type for AGP */ #define HAVE_PAGE_AGP @@ -45,7 +277,7 @@ #define __real_pte(e,p) ((real_pte_t){(e)}) #define __rpte_to_pte(r) ((r).pte) -#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >>_PAGE_F_GIX_SHIFT) +#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) #define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ do { \ @@ -62,6 +294,327 @@ #endif /* __real_pte */ +static inline unsigned long pte_update(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long clr, + unsigned long set, int huge) +{ + if (radix_enabled()) + return radix__pte_update(mm, addr, ptep, clr, set, huge); + return hash__pte_update(mm, addr, ptep, clr, set, huge); +} +/* + * For hash even if we have _PAGE_ACCESSED = 0, we do a pte_update. + * We currently remove entries from the hashtable regardless of whether + * the entry was young or dirty. + * + * We should be more intelligent about this but for the moment we override + * these functions and force a tlb flush unconditionally + * For radix: H_PAGE_HASHPTE should be zero. Hence we can use the same + * function for both hash and radix. + */ +static inline int __ptep_test_and_clear_young(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + unsigned long old; + + if ((pte_val(*ptep) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0) + return 0; + old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); + return (old & _PAGE_ACCESSED) != 0; +} + +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +#define ptep_test_and_clear_young(__vma, __addr, __ptep) \ +({ \ + int __r; \ + __r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \ + __r; \ +}) + +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + + if ((pte_val(*ptep) & _PAGE_WRITE) == 0) + return; + + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + if ((pte_val(*ptep) & _PAGE_WRITE) == 0) + return; + + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); +} + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0); + return __pte(old); +} + +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t * ptep) +{ + pte_update(mm, addr, ptep, ~0UL, 0, 0); +} +static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_WRITE);} +static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); } +static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); } +static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); } +static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } + +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +static inline bool pte_soft_dirty(pte_t pte) +{ + return !!(pte_val(pte) & _PAGE_SOFT_DIRTY); +} +static inline pte_t pte_mksoft_dirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY); +} + +static inline pte_t pte_clear_soft_dirty(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_SOFT_DIRTY); +} +#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ + +#ifdef CONFIG_NUMA_BALANCING +/* + * These work without NUMA balancing but the kernel does not care. See the + * comment in include/asm-generic/pgtable.h . On powerpc, this will only + * work for user pages and always return true for kernel pages. + */ +static inline int pte_protnone(pte_t pte) +{ + return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PRIVILEGED)) == + (_PAGE_PRESENT | _PAGE_PRIVILEGED); +} +#endif /* CONFIG_NUMA_BALANCING */ + +static inline int pte_present(pte_t pte) +{ + return !!(pte_val(pte) & _PAGE_PRESENT); +} +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + * + * Even if PTEs can be unsigned long long, a PFN is always an unsigned + * long for now. + */ +static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) +{ + return __pte((((pte_basic_t)(pfn) << PAGE_SHIFT) & PTE_RPN_MASK) | + pgprot_val(pgprot)); +} + +static inline unsigned long pte_pfn(pte_t pte) +{ + return (pte_val(pte) & PTE_RPN_MASK) >> PAGE_SHIFT; +} + +/* Generic modifiers for PTE bits */ +static inline pte_t pte_wrprotect(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_WRITE); +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_DIRTY); +} + +static inline pte_t pte_mkold(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_ACCESSED); +} + +static inline pte_t pte_mkwrite(pte_t pte) +{ + /* + * write implies read, hence set both + */ + return __pte(pte_val(pte) | _PAGE_RW); +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY); +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_ACCESSED); +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_SPECIAL); +} + +static inline pte_t pte_mkhuge(pte_t pte) +{ + return pte; +} + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + /* FIXME!! check whether this need to be a conditional */ + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); +} + +static inline bool pte_user(pte_t pte) +{ + return !(pte_val(pte) & _PAGE_PRIVILEGED); +} + +/* Encode and de-code a swap entry */ +#define MAX_SWAPFILES_CHECK() do { \ + BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \ + /* \ + * Don't have overlapping bits with _PAGE_HPTEFLAGS \ + * We filter HPTEFLAGS on set_pte. \ + */ \ + BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \ + BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY); \ + } while (0) +/* + * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT; + */ +#define SWP_TYPE_BITS 5 +#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \ + & ((1UL << SWP_TYPE_BITS) - 1)) +#define __swp_offset(x) (((x).val & PTE_RPN_MASK) >> PAGE_SHIFT) +#define __swp_entry(type, offset) ((swp_entry_t) { \ + ((type) << _PAGE_BIT_SWAP_TYPE) \ + | (((offset) << PAGE_SHIFT) & PTE_RPN_MASK)}) +/* + * swp_entry_t must be independent of pte bits. We build a swp_entry_t from + * swap type and offset we get from swap and convert that to pte to find a + * matching pte in linux page table. + * Clear bits not found in swap entries here. + */ +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE }) +#define __swp_entry_to_pte(x) __pte((x).val | _PAGE_PTE) + +#ifdef CONFIG_MEM_SOFT_DIRTY +#define _PAGE_SWP_SOFT_DIRTY (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE)) +#else +#define _PAGE_SWP_SOFT_DIRTY 0UL +#endif /* CONFIG_MEM_SOFT_DIRTY */ + +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +static inline pte_t pte_swp_mksoft_dirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY); +} +static inline bool pte_swp_soft_dirty(pte_t pte) +{ + return !!(pte_val(pte) & _PAGE_SWP_SOFT_DIRTY); +} +static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY); +} +#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ + +static inline bool check_pte_access(unsigned long access, unsigned long ptev) +{ + /* + * This check for _PAGE_RWX and _PAGE_PRESENT bits + */ + if (access & ~ptev) + return false; + /* + * This check for access to privilege space + */ + if ((access & _PAGE_PRIVILEGED) != (ptev & _PAGE_PRIVILEGED)) + return false; + + return true; +} +/* + * Generic functions with hash/radix callbacks + */ + +static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) +{ + if (radix_enabled()) + return radix__ptep_set_access_flags(ptep, entry); + return hash__ptep_set_access_flags(ptep, entry); +} + +#define __HAVE_ARCH_PTE_SAME +static inline int pte_same(pte_t pte_a, pte_t pte_b) +{ + if (radix_enabled()) + return radix__pte_same(pte_a, pte_b); + return hash__pte_same(pte_a, pte_b); +} + +static inline int pte_none(pte_t pte) +{ + if (radix_enabled()) + return radix__pte_none(pte); + return hash__pte_none(pte); +} + +static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, int percpu) +{ + if (radix_enabled()) + return radix__set_pte_at(mm, addr, ptep, pte, percpu); + return hash__set_pte_at(mm, addr, ptep, pte, percpu); +} + +#define _PAGE_CACHE_CTL (_PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT) + +#define pgprot_noncached pgprot_noncached +static inline pgprot_t pgprot_noncached(pgprot_t prot) +{ + return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | + _PAGE_NON_IDEMPOTENT); +} + +#define pgprot_noncached_wc pgprot_noncached_wc +static inline pgprot_t pgprot_noncached_wc(pgprot_t prot) +{ + return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | + _PAGE_TOLERANT); +} + +#define pgprot_cached pgprot_cached +static inline pgprot_t pgprot_cached(pgprot_t prot) +{ + return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL)); +} + +#define pgprot_writecombine pgprot_writecombine +static inline pgprot_t pgprot_writecombine(pgprot_t prot) +{ + return pgprot_noncached_wc(prot); +} +/* + * check a pte mapping have cache inhibited property + */ +static inline bool pte_ci(pte_t pte) +{ + unsigned long pte_v = pte_val(pte); + + if (((pte_v & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) || + ((pte_v & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)) + return true; + return false; +} + static inline void pmd_set(pmd_t *pmdp, unsigned long val) { *pmdp = __pmd(val); @@ -75,6 +628,13 @@ static inline void pmd_clear(pmd_t *pmdp) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_present(pmd) (!pmd_none(pmd)) +static inline int pmd_bad(pmd_t pmd) +{ + if (radix_enabled()) + return radix__pmd_bad(pmd); + return hash__pmd_bad(pmd); +} + static inline void pud_set(pud_t *pudp, unsigned long val) { *pudp = __pud(val); @@ -100,6 +660,15 @@ static inline pud_t pte_pud(pte_t pte) return __pud(pte_val(pte)); } #define pud_write(pud) pte_write(pud_pte(pud)) + +static inline int pud_bad(pud_t pud) +{ + if (radix_enabled()) + return radix__pud_bad(pud); + return hash__pud_bad(pud); +} + + #define pgd_write(pgd) pte_write(pgd_pte(pgd)) static inline void pgd_set(pgd_t *pgdp, unsigned long val) { @@ -124,8 +693,27 @@ static inline pgd_t pte_pgd(pte_t pte) return __pgd(pte_val(pte)); } +static inline int pgd_bad(pgd_t pgd) +{ + if (radix_enabled()) + return radix__pgd_bad(pgd); + return hash__pgd_bad(pgd); +} + extern struct page *pgd_page(pgd_t pgd); +/* Pointers in the page table tree are physical addresses */ +#define __pgtable_ptr_val(ptr) __pa(ptr) + +#define pmd_page_vaddr(pmd) __va(pmd_val(pmd) & ~PMD_MASKED_BITS) +#define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS) +#define pgd_page_vaddr(pgd) __va(pgd_val(pgd) & ~PGD_MASKED_BITS) + +#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) +#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) +#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) +#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) + /* * Find an entry in a page-table-directory. We combine the address region * (the high order N bits) and the pgd portion of the address. @@ -156,73 +744,42 @@ extern struct page *pgd_page(pgd_t pgd); #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) -/* Encode and de-code a swap entry */ -#define MAX_SWAPFILES_CHECK() do { \ - BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \ - /* \ - * Don't have overlapping bits with _PAGE_HPTEFLAGS \ - * We filter HPTEFLAGS on set_pte. \ - */ \ - BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \ - BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY); \ - } while (0) -/* - * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT; - */ -#define SWP_TYPE_BITS 5 -#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \ - & ((1UL << SWP_TYPE_BITS) - 1)) -#define __swp_offset(x) (((x).val & PTE_RPN_MASK) >> PTE_RPN_SHIFT) -#define __swp_entry(type, offset) ((swp_entry_t) { \ - ((type) << _PAGE_BIT_SWAP_TYPE) \ - | (((offset) << PTE_RPN_SHIFT) & PTE_RPN_MASK)}) -/* - * swp_entry_t must be independent of pte bits. We build a swp_entry_t from - * swap type and offset we get from swap and convert that to pte to find a - * matching pte in linux page table. - * Clear bits not found in swap entries here. - */ -#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE }) -#define __swp_entry_to_pte(x) __pte((x).val | _PAGE_PTE) - -#ifdef CONFIG_MEM_SOFT_DIRTY -#define _PAGE_SWP_SOFT_DIRTY (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE)) -#else -#define _PAGE_SWP_SOFT_DIRTY 0UL -#endif /* CONFIG_MEM_SOFT_DIRTY */ +void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); +void pgtable_cache_init(void); -#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY -static inline pte_t pte_swp_mksoft_dirty(pte_t pte) +static inline int map_kernel_page(unsigned long ea, unsigned long pa, + unsigned long flags) { - return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY); + if (radix_enabled()) { +#if defined(CONFIG_PPC_RADIX_MMU) && defined(DEBUG_VM) + unsigned long page_size = 1 << mmu_psize_defs[mmu_io_psize].shift; + WARN((page_size != PAGE_SIZE), "I/O page size != PAGE_SIZE"); +#endif + return radix__map_kernel_page(ea, pa, __pgprot(flags), PAGE_SIZE); + } + return hash__map_kernel_page(ea, pa, flags); } -static inline bool pte_swp_soft_dirty(pte_t pte) + +static inline int __meminit vmemmap_create_mapping(unsigned long start, + unsigned long page_size, + unsigned long phys) { - return !!(pte_val(pte) & _PAGE_SWP_SOFT_DIRTY); + if (radix_enabled()) + return radix__vmemmap_create_mapping(start, page_size, phys); + return hash__vmemmap_create_mapping(start, page_size, phys); } -static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) + +#ifdef CONFIG_MEMORY_HOTPLUG +static inline void vmemmap_remove_mapping(unsigned long start, + unsigned long page_size) { - return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY); + if (radix_enabled()) + return radix__vmemmap_remove_mapping(start, page_size); + return hash__vmemmap_remove_mapping(start, page_size); } -#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ - -void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); -void pgtable_cache_init(void); - +#endif struct page *realmode_pfn_to_page(unsigned long pfn); -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); -extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); -extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot); -extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmd); -extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd); -extern int has_transparent_hugepage(void); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - - static inline pte_t pmd_pte(pmd_t pmd) { return __pte(pmd_val(pmd)); @@ -237,7 +794,6 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd) { return (pte_t *)pmd; } - #define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) @@ -264,9 +820,87 @@ static inline int pmd_protnone(pmd_t pmd) #define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); +extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); +extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot); +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd); +extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd); +extern int hash__has_transparent_hugepage(void); +static inline int has_transparent_hugepage(void) +{ + if (radix_enabled()) + return radix__has_transparent_hugepage(); + return hash__has_transparent_hugepage(); +} +#define has_transparent_hugepage has_transparent_hugepage + +static inline unsigned long +pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, + unsigned long clr, unsigned long set) +{ + if (radix_enabled()) + return radix__pmd_hugepage_update(mm, addr, pmdp, clr, set); + return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set); +} + +static inline int pmd_large(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_PTE); +} + +static inline pmd_t pmd_mknotpresent(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT); +} +/* + * For radix we should always find H_PAGE_HASHPTE zero. Hence + * the below will work for radix too + */ +static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + unsigned long old; + + if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0) + return 0; + old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0); + return ((old & _PAGE_ACCESSED) != 0); +} + +#define __HAVE_ARCH_PMDP_SET_WRPROTECT +static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp) +{ + + if ((pmd_val(*pmdp) & _PAGE_WRITE) == 0) + return; + + pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0); +} + +static inline int pmd_trans_huge(pmd_t pmd) +{ + if (radix_enabled()) + return radix__pmd_trans_huge(pmd); + return hash__pmd_trans_huge(pmd); +} + +#define __HAVE_ARCH_PMD_SAME +static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) +{ + if (radix_enabled()) + return radix__pmd_same(pmd_a, pmd_b); + return hash__pmd_same(pmd_a, pmd_b); +} + static inline pmd_t pmd_mkhuge(pmd_t pmd) { - return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_THP_HUGE)); + if (radix_enabled()) + return radix__pmd_mkhuge(pmd); + return hash__pmd_mkhuge(pmd); } #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS @@ -277,37 +911,63 @@ extern int pmdp_set_access_flags(struct vm_area_struct *vma, #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); -#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH -extern int pmdp_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR -extern pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, - unsigned long addr, pmd_t *pmdp); +static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + if (radix_enabled()) + return radix__pmdp_huge_get_and_clear(mm, addr, pmdp); + return hash__pmdp_huge_get_and_clear(mm, addr, pmdp); +} -extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); +static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + if (radix_enabled()) + return radix__pmdp_collapse_flush(vma, address, pmdp); + return hash__pmdp_collapse_flush(vma, address, pmdp); +} #define pmdp_collapse_flush pmdp_collapse_flush #define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, - pgtable_t pgtable); +static inline void pgtable_trans_huge_deposit(struct mm_struct *mm, + pmd_t *pmdp, pgtable_t pgtable) +{ + if (radix_enabled()) + return radix__pgtable_trans_huge_deposit(mm, pmdp, pgtable); + return hash__pgtable_trans_huge_deposit(mm, pmdp, pgtable); +} + #define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); +static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, + pmd_t *pmdp) +{ + if (radix_enabled()) + return radix__pgtable_trans_huge_withdraw(mm, pmdp); + return hash__pgtable_trans_huge_withdraw(mm, pmdp); +} #define __HAVE_ARCH_PMDP_INVALIDATE extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); #define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE -extern void pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); +static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + if (radix_enabled()) + return radix__pmdp_huge_split_prepare(vma, address, pmdp); + return hash__pmdp_huge_split_prepare(vma, address, pmdp); +} #define pmd_move_must_withdraw pmd_move_must_withdraw struct spinlock; static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, struct spinlock *old_pmd_ptl) { + if (radix_enabled()) + return false; /* * Archs like ppc64 use pgtable to store per pmd * specific information. So when we switch the pmd, @@ -315,5 +975,6 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, */ return true; } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h new file mode 100644 index 000000000000..7c3b1fe1619e --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h @@ -0,0 +1,12 @@ +#ifndef _ASM_POWERPC_PGTABLE_RADIX_4K_H +#define _ASM_POWERPC_PGTABLE_RADIX_4K_H + +/* + * For 4K page size supported index is 13/9/9/9 + */ +#define RADIX_PTE_INDEX_SIZE 9 /* 2MB huge page */ +#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */ +#define RADIX_PUD_INDEX_SIZE 9 +#define RADIX_PGD_INDEX_SIZE 13 + +#endif /* _ASM_POWERPC_PGTABLE_RADIX_4K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/radix-64k.h b/arch/powerpc/include/asm/book3s/64/radix-64k.h new file mode 100644 index 000000000000..82dc355f0b45 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/radix-64k.h @@ -0,0 +1,12 @@ +#ifndef _ASM_POWERPC_PGTABLE_RADIX_64K_H +#define _ASM_POWERPC_PGTABLE_RADIX_64K_H + +/* + * For 64K page size supported index is 13/9/9/5 + */ +#define RADIX_PTE_INDEX_SIZE 5 /* 2MB huge page */ +#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */ +#define RADIX_PUD_INDEX_SIZE 9 +#define RADIX_PGD_INDEX_SIZE 13 + +#endif /* _ASM_POWERPC_PGTABLE_RADIX_64K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h new file mode 100644 index 000000000000..937d4e247ac3 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -0,0 +1,232 @@ +#ifndef _ASM_POWERPC_PGTABLE_RADIX_H +#define _ASM_POWERPC_PGTABLE_RADIX_H + +#ifndef __ASSEMBLY__ +#include <asm/cmpxchg.h> +#endif + +#ifdef CONFIG_PPC_64K_PAGES +#include <asm/book3s/64/radix-64k.h> +#else +#include <asm/book3s/64/radix-4k.h> +#endif + +/* An empty PTE can still have a R or C writeback */ +#define RADIX_PTE_NONE_MASK (_PAGE_DIRTY | _PAGE_ACCESSED) + +/* Bits to set in a RPMD/RPUD/RPGD */ +#define RADIX_PMD_VAL_BITS (0x8000000000000000UL | RADIX_PTE_INDEX_SIZE) +#define RADIX_PUD_VAL_BITS (0x8000000000000000UL | RADIX_PMD_INDEX_SIZE) +#define RADIX_PGD_VAL_BITS (0x8000000000000000UL | RADIX_PUD_INDEX_SIZE) + +/* Don't have anything in the reserved bits and leaf bits */ +#define RADIX_PMD_BAD_BITS 0x60000000000000e0UL +#define RADIX_PUD_BAD_BITS 0x60000000000000e0UL +#define RADIX_PGD_BAD_BITS 0x60000000000000e0UL + +/* + * Size of EA range mapped by our pagetables. + */ +#define RADIX_PGTABLE_EADDR_SIZE (RADIX_PTE_INDEX_SIZE + RADIX_PMD_INDEX_SIZE + \ + RADIX_PUD_INDEX_SIZE + RADIX_PGD_INDEX_SIZE + PAGE_SHIFT) +#define RADIX_PGTABLE_RANGE (ASM_CONST(1) << RADIX_PGTABLE_EADDR_SIZE) + +/* + * We support 52 bit address space, Use top bit for kernel + * virtual mapping. Also make sure kernel fit in the top + * quadrant. + * + * +------------------+ + * +------------------+ Kernel virtual map (0xc008000000000000) + * | | + * | | + * | | + * 0b11......+------------------+ Kernel linear map (0xc....) + * | | + * | 2 quadrant | + * | | + * 0b10......+------------------+ + * | | + * | 1 quadrant | + * | | + * 0b01......+------------------+ + * | | + * | 0 quadrant | + * | | + * 0b00......+------------------+ + * + * + * 3rd quadrant expanded: + * +------------------------------+ + * | | + * | | + * | | + * +------------------------------+ Kernel IO map end (0xc010000000000000) + * | | + * | | + * | 1/2 of virtual map | + * | | + * | | + * +------------------------------+ Kernel IO map start + * | | + * | 1/4 of virtual map | + * | | + * +------------------------------+ Kernel vmemap start + * | | + * | 1/4 of virtual map | + * | | + * +------------------------------+ Kernel virt start (0xc008000000000000) + * | | + * | | + * | | + * +------------------------------+ Kernel linear (0xc.....) + */ + +#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000) +#define RADIX_KERN_VIRT_SIZE ASM_CONST(0x0008000000000000) + +/* + * The vmalloc space starts at the beginning of that region, and + * occupies a quarter of it on radix config. + * (we keep a quarter for the virtual memmap) + */ +#define RADIX_VMALLOC_START RADIX_KERN_VIRT_START +#define RADIX_VMALLOC_SIZE (RADIX_KERN_VIRT_SIZE >> 2) +#define RADIX_VMALLOC_END (RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE) +/* + * Defines the address of the vmemap area, in its own region on + * hash table CPUs. + */ +#define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END) + +#ifndef __ASSEMBLY__ +#define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE) +#define RADIX_PMD_TABLE_SIZE (sizeof(pmd_t) << RADIX_PMD_INDEX_SIZE) +#define RADIX_PUD_TABLE_SIZE (sizeof(pud_t) << RADIX_PUD_INDEX_SIZE) +#define RADIX_PGD_TABLE_SIZE (sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE) + +static inline unsigned long radix__pte_update(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, unsigned long clr, + unsigned long set, + int huge) +{ + pte_t pte; + unsigned long old_pte, new_pte; + + do { + pte = READ_ONCE(*ptep); + old_pte = pte_val(pte); + new_pte = (old_pte | set) & ~clr; + + } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); + + /* We already do a sync in cmpxchg, is ptesync needed ?*/ + asm volatile("ptesync" : : : "memory"); + /* huge pages use the old page table lock */ + if (!huge) + assert_pte_locked(mm, addr); + + return old_pte; +} + +/* + * Set the dirty and/or accessed bits atomically in a linux PTE, this + * function doesn't need to invalidate tlb. + */ +static inline void radix__ptep_set_access_flags(pte_t *ptep, pte_t entry) +{ + pte_t pte; + unsigned long old_pte, new_pte; + unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | + _PAGE_RW | _PAGE_EXEC); + do { + pte = READ_ONCE(*ptep); + old_pte = pte_val(pte); + new_pte = old_pte | set; + + } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); + + /* We already do a sync in cmpxchg, is ptesync needed ?*/ + asm volatile("ptesync" : : : "memory"); +} + +static inline int radix__pte_same(pte_t pte_a, pte_t pte_b) +{ + return ((pte_raw(pte_a) ^ pte_raw(pte_b)) == 0); +} + +static inline int radix__pte_none(pte_t pte) +{ + return (pte_val(pte) & ~RADIX_PTE_NONE_MASK) == 0; +} + +static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, int percpu) +{ + *ptep = pte; + asm volatile("ptesync" : : : "memory"); +} + +static inline int radix__pmd_bad(pmd_t pmd) +{ + return !!(pmd_val(pmd) & RADIX_PMD_BAD_BITS); +} + +static inline int radix__pmd_same(pmd_t pmd_a, pmd_t pmd_b) +{ + return ((pmd_raw(pmd_a) ^ pmd_raw(pmd_b)) == 0); +} + +static inline int radix__pud_bad(pud_t pud) +{ + return !!(pud_val(pud) & RADIX_PUD_BAD_BITS); +} + + +static inline int radix__pgd_bad(pgd_t pgd) +{ + return !!(pgd_val(pgd) & RADIX_PGD_BAD_BITS); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +static inline int radix__pmd_trans_huge(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_PTE); +} + +static inline pmd_t radix__pmd_mkhuge(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | _PAGE_PTE); +} +static inline void radix__pmdp_huge_split_prepare(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + /* Nothing to do for radix. */ + return; +} + +extern unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, unsigned long clr, + unsigned long set); +extern pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); +extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); +extern pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); +extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp); +extern int radix__has_transparent_hugepage(void); +#endif + +extern int __meminit radix__vmemmap_create_mapping(unsigned long start, + unsigned long page_size, + unsigned long phys); +extern void radix__vmemmap_remove_mapping(unsigned long start, + unsigned long page_size); + +extern int radix__map_kernel_page(unsigned long ea, unsigned long pa, + pgprot_t flags, unsigned int psz); +#endif /* __ASSEMBLY__ */ +#endif diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h index 1b753f96b374..f12ddf5e8de5 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -1,8 +1,6 @@ #ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H #define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H -#define MMU_NO_CONTEXT 0 - /* * TLB flushing for 64-bit hash-MMU CPUs */ @@ -29,14 +27,21 @@ extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch); static inline void arch_enter_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); + struct ppc64_tlb_batch *batch; + if (radix_enabled()) + return; + batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } static inline void arch_leave_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); + struct ppc64_tlb_batch *batch; + + if (radix_enabled()) + return; + batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->index) __flush_tlb_pending(batch); @@ -52,40 +57,42 @@ extern void flush_hash_range(unsigned long number, int local); extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, pmd_t *pmdp, unsigned int psize, int ssize, unsigned long flags); - -static inline void local_flush_tlb_mm(struct mm_struct *mm) +static inline void hash__local_flush_tlb_mm(struct mm_struct *mm) { } -static inline void flush_tlb_mm(struct mm_struct *mm) +static inline void hash__flush_tlb_mm(struct mm_struct *mm) { } -static inline void local_flush_tlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) +static inline void hash__local_flush_tlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) { } -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) +static inline void hash__flush_tlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) { } -static inline void flush_tlb_page_nohash(struct vm_area_struct *vma, - unsigned long vmaddr) +static inline void hash__flush_tlb_page_nohash(struct vm_area_struct *vma, + unsigned long vmaddr) { } -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static inline void hash__flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) { } -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) +static inline void hash__flush_tlb_kernel_range(unsigned long start, + unsigned long end) { } + +struct mmu_gather; +extern void hash__tlb_flush(struct mmu_gather *tlb); /* Private function for use by PCI IO mapping code */ extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, unsigned long end); diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h new file mode 100644 index 000000000000..13ef38828dfe --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -0,0 +1,33 @@ +#ifndef _ASM_POWERPC_TLBFLUSH_RADIX_H +#define _ASM_POWERPC_TLBFLUSH_RADIX_H + +struct vm_area_struct; +struct mm_struct; +struct mmu_gather; + +static inline int mmu_get_ap(int psize) +{ + return mmu_psize_defs[psize].ap; +} + +extern void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end); + +extern void radix__local_flush_tlb_mm(struct mm_struct *mm); +extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +extern void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, + unsigned long ap, int nid); +extern void radix__tlb_flush(struct mmu_gather *tlb); +#ifdef CONFIG_SMP +extern void radix__flush_tlb_mm(struct mm_struct *mm); +extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +extern void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, + unsigned long ap, int nid); +#else +#define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm) +#define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr) +#define radix___flush_tlb_page(mm,addr,p,i) radix___local_flush_tlb_page(mm,addr,p,i) +#endif + +#endif diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h new file mode 100644 index 000000000000..d98424ae356c --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -0,0 +1,76 @@ +#ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H +#define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H + +#define MMU_NO_CONTEXT ~0UL + + +#include <asm/book3s/64/tlbflush-hash.h> +#include <asm/book3s/64/tlbflush-radix.h> + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + if (radix_enabled()) + return radix__flush_tlb_range(vma, start, end); + return hash__flush_tlb_range(vma, start, end); +} + +static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) +{ + if (radix_enabled()) + return radix__flush_tlb_kernel_range(start, end); + return hash__flush_tlb_kernel_range(start, end); +} + +static inline void local_flush_tlb_mm(struct mm_struct *mm) +{ + if (radix_enabled()) + return radix__local_flush_tlb_mm(mm); + return hash__local_flush_tlb_mm(mm); +} + +static inline void local_flush_tlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ + if (radix_enabled()) + return radix__local_flush_tlb_page(vma, vmaddr); + return hash__local_flush_tlb_page(vma, vmaddr); +} + +static inline void flush_tlb_page_nohash(struct vm_area_struct *vma, + unsigned long vmaddr) +{ + if (radix_enabled()) + return radix__flush_tlb_page(vma, vmaddr); + return hash__flush_tlb_page_nohash(vma, vmaddr); +} + +static inline void tlb_flush(struct mmu_gather *tlb) +{ + if (radix_enabled()) + return radix__tlb_flush(tlb); + return hash__tlb_flush(tlb); +} + +#ifdef CONFIG_SMP +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + if (radix_enabled()) + return radix__flush_tlb_mm(mm); + return hash__flush_tlb_mm(mm); +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ + if (radix_enabled()) + return radix__flush_tlb_page(vma, vmaddr); + return hash__flush_tlb_page(vma, vmaddr); +} +#else +#define flush_tlb_mm(mm) local_flush_tlb_mm(mm) +#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr) +#endif /* CONFIG_SMP */ + +#endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */ diff --git a/arch/powerpc/include/asm/book3s/pgalloc.h b/arch/powerpc/include/asm/book3s/pgalloc.h new file mode 100644 index 000000000000..54f591e9572e --- /dev/null +++ b/arch/powerpc/include/asm/book3s/pgalloc.h @@ -0,0 +1,19 @@ +#ifndef _ASM_POWERPC_BOOK3S_PGALLOC_H +#define _ASM_POWERPC_BOOK3S_PGALLOC_H + +#include <linux/mm.h> + +extern void tlb_remove_table(struct mmu_gather *tlb, void *table); +static inline void tlb_flush_pgtable(struct mmu_gather *tlb, + unsigned long address) +{ + +} + +#ifdef CONFIG_PPC64 +#include <asm/book3s/64/pgalloc.h> +#else +#include <asm/book3s/32/pgalloc.h> +#endif + +#endif /* _ASM_POWERPC_BOOK3S_PGALLOC_H */ |