From 2e5d9c857d4e6c9e7b7d8c8c86a68a7842d213d6 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Tue, 18 Mar 2008 17:00:14 -0700 Subject: x86: PAT infrastructure patch Sets up pat_init() infrastructure. PAT MSR has following setting. PAT |PCD ||PWT ||| 000 WB _PAGE_CACHE_WB 001 WC _PAGE_CACHE_WC 010 UC- _PAGE_CACHE_UC_MINUS 011 UC _PAGE_CACHE_UC We are effectively changing WT from boot time setting to WC. UC_MINUS is used to provide backward compatibility to existing /dev/mem users(X). reserve_memtype and free_memtype are new interfaces for maintaining alias-free mapping. It is currently implemented in a simple way with a linked list and not optimized. reserve and free tracks the effective memory type, as a result of PAT and MTRR setting rather than what is actually requested in PAT. pat_init piggy backs on mtrr_init as the rules for setting both pat and mtrr are same. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- include/asm-x86/pgtable.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/asm-x86/pgtable.h') diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 9cf472aeb9ce..ca6deb3de7c0 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -57,6 +57,12 @@ #define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) +#define _PAGE_CACHE_WB (0) +#define _PAGE_CACHE_WC (_PAGE_PWT) +#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD) +#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT) + #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) -- cgit v1.2.3 From b310f381d220b2c6e3fab16e8c6e4ca13eea75b2 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Tue, 18 Mar 2008 17:00:24 -0700 Subject: x86: PAT add ioremap_wc() interface Introduce ioremap_wc for wc remap. (generic wrapper is in a later patch) Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 30 ++++++++++++++++++++++++++++++ include/asm-x86/io.h | 3 +++ include/asm-x86/pgtable.h | 2 ++ 3 files changed, 35 insertions(+) (limited to 'include/asm-x86/pgtable.h') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 0cdb7f11ce49..51cd3956c564 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -97,6 +97,9 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, default: err = _set_memory_uc(vaddr, nrpages); break; + case _PAGE_CACHE_WC: + err = _set_memory_wc(vaddr, nrpages); + break; case _PAGE_CACHE_WB: err = _set_memory_wb(vaddr, nrpages); break; @@ -166,8 +169,13 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, * Do not fallback to certain memory types with certain * requested type: * - request is uncached, return cannot be write-back + * - request is uncached, return cannot be write-combine + * - request is write-combine, return cannot be write-back */ if ((prot_val == _PAGE_CACHE_UC && + (new_prot_val == _PAGE_CACHE_WB || + new_prot_val == _PAGE_CACHE_WC)) || + (prot_val == _PAGE_CACHE_WC && new_prot_val == _PAGE_CACHE_WB)) { free_memtype(phys_addr, phys_addr + size); return NULL; @@ -180,6 +188,9 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, default: prot = PAGE_KERNEL_NOCACHE; break; + case _PAGE_CACHE_WC: + prot = PAGE_KERNEL_WC; + break; case _PAGE_CACHE_WB: prot = PAGE_KERNEL; break; @@ -235,6 +246,25 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) } EXPORT_SYMBOL(ioremap_nocache); +/** + * ioremap_wc - map memory into CPU space write combined + * @offset: bus address of the memory + * @size: size of the resource to map + * + * This version of ioremap ensures that the memory is marked write combining. + * Write combining allows faster writes to some hardware devices. + * + * Must be freed with iounmap. + */ +void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) +{ + if (pat_wc_enabled) + return __ioremap(phys_addr, size, _PAGE_CACHE_WC); + else + return ioremap_nocache(phys_addr, size); +} +EXPORT_SYMBOL(ioremap_wc); + void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) { return __ioremap(phys_addr, size, _PAGE_CACHE_WB); diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h index 6fa150fa68fa..599cad3505c1 100644 --- a/include/asm-x86/io.h +++ b/include/asm-x86/io.h @@ -1,3 +1,5 @@ +#define ARCH_HAS_IOREMAP_WC + #ifdef CONFIG_X86_32 # include "io_32.h" #else @@ -5,4 +7,5 @@ #endif extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, unsigned long prot_val); +extern void __iomem * ioremap_wc(unsigned long offset, unsigned long size); diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index ca6deb3de7c0..e814cfe96af2 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -90,6 +90,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC; #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) #define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) #define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT) +#define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC) #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) #define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) #define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) @@ -107,6 +108,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC; #define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO) #define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC) #define PAGE_KERNEL_RX MAKE_GLOBAL(__PAGE_KERNEL_RX) +#define PAGE_KERNEL_WC MAKE_GLOBAL(__PAGE_KERNEL_WC) #define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE) #define PAGE_KERNEL_UC_MINUS MAKE_GLOBAL(__PAGE_KERNEL_UC_MINUS) #define PAGE_KERNEL_EXEC_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE) -- cgit v1.2.3 From 3cbaeafeb10e38bce6c8d4764a254260d5a564bd Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 23 Mar 2008 01:03:12 -0700 Subject: include/asm-x86/pgtable.h: checkpatch cleanups - formatting only Signed-off-by: Joe Perches Signed-off-by: Ingo Molnar --- include/asm-x86/pgtable.h | 156 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 120 insertions(+), 36 deletions(-) (limited to 'include/asm-x86/pgtable.h') diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index e814cfe96af2..2ce765070464 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -48,12 +48,15 @@ #endif /* If _PAGE_PRESENT is clear, we use these: */ -#define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping, saved PTE; unset:swap */ +#define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping, + * saved PTE; unset:swap */ #define _PAGE_PROTNONE _PAGE_PSE /* if the user mapped it with PROT_NONE; pte_present gives true */ -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_DIRTY) +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ + _PAGE_DIRTY) #define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) @@ -64,14 +67,20 @@ #define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT) #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) - -#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_NX) + +#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \ + _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_NX) +#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED) #define PAGE_COPY PAGE_COPY_NOEXEC -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_NX) +#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED) #ifdef CONFIG_X86_32 #define _PAGE_KERNEL_EXEC \ @@ -142,7 +151,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC; * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) extern spinlock_t pgd_lock; @@ -152,30 +161,101 @@ extern struct list_head pgd_list; * The following only work if pte_present() is true. * Undefined behaviour if not.. */ -static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } -static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } -static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } -static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_PSE; } -static inline int pte_global(pte_t pte) { return pte_val(pte) & _PAGE_GLOBAL; } -static inline int pte_exec(pte_t pte) { return !(pte_val(pte) & _PAGE_NX); } - -static inline int pmd_large(pmd_t pte) { - return (pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) == - (_PAGE_PSE|_PAGE_PRESENT); +static inline int pte_dirty(pte_t pte) +{ + return pte_val(pte) & _PAGE_DIRTY; +} + +static inline int pte_young(pte_t pte) +{ + return pte_val(pte) & _PAGE_ACCESSED; +} + +static inline int pte_write(pte_t pte) +{ + return pte_val(pte) & _PAGE_RW; +} + +static inline int pte_file(pte_t pte) +{ + return pte_val(pte) & _PAGE_FILE; +} + +static inline int pte_huge(pte_t pte) +{ + return pte_val(pte) & _PAGE_PSE; } -static inline pte_t pte_mkclean(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_DIRTY); } -static inline pte_t pte_mkold(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED); } -static inline pte_t pte_wrprotect(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_RW); } -static inline pte_t pte_mkexec(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_NX); } -static inline pte_t pte_mkdirty(pte_t pte) { return __pte(pte_val(pte) | _PAGE_DIRTY); } -static inline pte_t pte_mkyoung(pte_t pte) { return __pte(pte_val(pte) | _PAGE_ACCESSED); } -static inline pte_t pte_mkwrite(pte_t pte) { return __pte(pte_val(pte) | _PAGE_RW); } -static inline pte_t pte_mkhuge(pte_t pte) { return __pte(pte_val(pte) | _PAGE_PSE); } -static inline pte_t pte_clrhuge(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_PSE); } -static inline pte_t pte_mkglobal(pte_t pte) { return __pte(pte_val(pte) | _PAGE_GLOBAL); } -static inline pte_t pte_clrglobal(pte_t pte) { return __pte(pte_val(pte) & ~(pteval_t)_PAGE_GLOBAL); } +static inline int pte_global(pte_t pte) +{ + return pte_val(pte) & _PAGE_GLOBAL; +} + +static inline int pte_exec(pte_t pte) +{ + return !(pte_val(pte) & _PAGE_NX); +} + +static inline int pmd_large(pmd_t pte) +{ + return (pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == + (_PAGE_PSE | _PAGE_PRESENT); +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + return __pte(pte_val(pte) & ~(pteval_t)_PAGE_DIRTY); +} + +static inline pte_t pte_mkold(pte_t pte) +{ + return __pte(pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED); +} + +static inline pte_t pte_wrprotect(pte_t pte) +{ + return __pte(pte_val(pte) & ~(pteval_t)_PAGE_RW); +} + +static inline pte_t pte_mkexec(pte_t pte) +{ + return __pte(pte_val(pte) & ~(pteval_t)_PAGE_NX); +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_DIRTY); +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_ACCESSED); +} + +static inline pte_t pte_mkwrite(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_RW); +} + +static inline pte_t pte_mkhuge(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_PSE); +} + +static inline pte_t pte_clrhuge(pte_t pte) +{ + return __pte(pte_val(pte) & ~(pteval_t)_PAGE_PSE); +} + +static inline pte_t pte_mkglobal(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_GLOBAL); +} + +static inline pte_t pte_clrglobal(pte_t pte) +{ + return __pte(pte_val(pte) & ~(pteval_t)_PAGE_GLOBAL); +} extern pteval_t __supported_pte_mask; @@ -342,7 +422,8 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, }) #define __HAVE_ARCH_PTEP_GET_AND_CLEAR -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { pte_t pte = native_ptep_get_and_clear(ptep); pte_update(mm, addr, ptep); @@ -350,7 +431,9 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, } #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL -static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) +static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + int full) { pte_t pte; if (full) { @@ -366,7 +449,8 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long } #define __HAVE_ARCH_PTEP_SET_WRPROTECT -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +static inline void ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte); pte_update(mm, addr, ptep); -- cgit v1.2.3 From 43cdf5d6e0a75c1069adc8d126b97b792ff53142 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Sat, 22 Mar 2008 18:50:22 +0100 Subject: x86: pgtable, document pde bits Some of pde bits weren't documented, add the short description to them. Signed-off-by: Jiri Slaby Cc: H. Peter Anvin Signed-off-by: Ingo Molnar --- include/asm-x86/pgtable.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/asm-x86/pgtable.h') diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 2ce765070464..f1d9f4a03f6f 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -4,13 +4,13 @@ #define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1) #define FIRST_USER_ADDRESS 0 -#define _PAGE_BIT_PRESENT 0 -#define _PAGE_BIT_RW 1 -#define _PAGE_BIT_USER 2 -#define _PAGE_BIT_PWT 3 -#define _PAGE_BIT_PCD 4 -#define _PAGE_BIT_ACCESSED 5 -#define _PAGE_BIT_DIRTY 6 +#define _PAGE_BIT_PRESENT 0 /* is present */ +#define _PAGE_BIT_RW 1 /* writeable */ +#define _PAGE_BIT_USER 2 /* userspace addressable */ +#define _PAGE_BIT_PWT 3 /* page write through */ +#define _PAGE_BIT_PCD 4 /* page cache disabled */ +#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */ +#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */ #define _PAGE_BIT_FILE 6 #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ #define _PAGE_BIT_PAT 7 /* on 4KB pages */ -- cgit v1.2.3 From f0970c13b6a5b01189aeb196ebb573cf87d95839 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Tue, 18 Mar 2008 17:00:20 -0700 Subject: x86: PAT phys_mem_access_prot_allowed for dev/mem mmap Introduce phys_mem_access_prot_allowed(), which checks whether the mapping is possible, without any conflicts and returns success or failure based on that. phys_mem_access_prot() by itself does not allow failure case. This ability to return error is needed for PAT where we may have aliasing conflicts. x86 setup __HAVE_PHYS_MEM_ACCESS_PROT and move x86 specific code out of /dev/mem into arch specific area. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 39 +++++++++++++++++++++++++++++++++++++++ drivers/char/mem.c | 41 +++++++++++------------------------------ include/asm-x86/pgtable.h | 9 +++++++++ 3 files changed, 59 insertions(+), 30 deletions(-) (limited to 'include/asm-x86/pgtable.h') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 72c0f6097402..64cc0c18233e 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -419,3 +419,42 @@ int free_memtype(u64 start, u64 end) return err; } + +/* /dev/mem interface. Use the previous mapping */ +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ + return vma_prot; +} + +int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t *vma_prot) +{ + + if (file->f_flags & O_SYNC) { + *vma_prot = pgprot_noncached(*vma_prot); + return 1; + } + +#ifdef CONFIG_X86_32 + /* + * On the PPro and successors, the MTRRs are used to set + * memory types for physical addresses outside main memory, + * so blindly setting UC or PWT on those pages is wrong. + * For Pentiums and earlier, the surround logic should disable + * caching for the high addresses through the KEN pin, but + * we maintain the tradition of paranoia in this code. + */ + if (!pat_wc_enabled && + ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) || + test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) || + test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) || + test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) && + (pfn << PAGE_SHIFT) >= __pa(high_memory)) { + *vma_prot = pgprot_noncached(*vma_prot); + return 1; + } +#endif + + return 1; +} diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 83495885ada0..56b2fb4fbc93 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -41,36 +41,7 @@ */ static inline int uncached_access(struct file *file, unsigned long addr) { -#if defined(__i386__) && !defined(__arch_um__) - /* - * On the PPro and successors, the MTRRs are used to set - * memory types for physical addresses outside main memory, - * so blindly setting PCD or PWT on those pages is wrong. - * For Pentiums and earlier, the surround logic should disable - * caching for the high addresses through the KEN pin, but - * we maintain the tradition of paranoia in this code. - */ - if (file->f_flags & O_SYNC) - return 1; - return !( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability) ) - && addr >= __pa(high_memory); -#elif defined(__x86_64__) && !defined(__arch_um__) - /* - * This is broken because it can generate memory type aliases, - * which can cause cache corruptions - * But it is only available for root and we have to be bug-to-bug - * compatible with i386. - */ - if (file->f_flags & O_SYNC) - return 1; - /* same behaviour as i386. PAT always set to cached and MTRRs control the - caching behaviour. - Hopefully a full PAT implementation will fix that soon. */ - return 0; -#elif defined(CONFIG_IA64) +#if defined(CONFIG_IA64) /* * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases. */ @@ -283,6 +254,12 @@ static ssize_t write_mem(struct file * file, const char __user * buf, return written; } +int __attribute__((weak)) phys_mem_access_prot_allowed(struct file *file, + unsigned long pfn, unsigned long size, pgprot_t *vma_prot) +{ + return 1; +} + #ifndef __HAVE_PHYS_MEM_ACCESS_PROT static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) @@ -336,6 +313,10 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma) if (!range_is_allowed(vma->vm_pgoff, size)) return -EPERM; + if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size, + &vma->vm_page_prot)) + return -EINVAL; + vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, size, vma->vm_page_prot); diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index f1d9f4a03f6f..1902f0aed6c1 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -289,6 +289,15 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) +#ifndef __ASSEMBLY__ +#define __HAVE_PHYS_MEM_ACCESS_PROT +struct file; +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); +int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t *vma_prot); +#endif + #ifdef CONFIG_PARAVIRT #include #else /* !CONFIG_PARAVIRT */ -- cgit v1.2.3 From ee5aa8d3ba65d76157f22b7afedd089d8acfe524 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 17 Mar 2008 16:37:03 -0700 Subject: x86/pgtable.h: demacro ptep_set_access_flags Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pgtable.c | 16 ++++++++++++++++ include/asm-x86/pgtable.h | 13 +++---------- 2 files changed, 19 insertions(+), 10 deletions(-) (limited to 'include/asm-x86/pgtable.h') diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5accc08683c7..e7cda2057e1d 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -1,5 +1,6 @@ #include #include +#include #include pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) @@ -264,3 +265,18 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) free_page((unsigned long)pgd); } #endif + +int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty) +{ + int changed = !pte_same(*ptep, entry); + + if (changed && dirty) { + *ptep = entry; + pte_update_defer(vma->vm_mm, address, ptep); + flush_tlb_page(vma, address); + } + + return changed; +} diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index f1d9f4a03f6f..feddddc2d97b 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -389,16 +389,9 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, * bit at the same time. */ #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ -({ \ - int __changed = !pte_same(*(ptep), entry); \ - if (__changed && dirty) { \ - *ptep = entry; \ - pte_update_defer((vma)->vm_mm, (address), (ptep)); \ - flush_tlb_page(vma, address); \ - } \ - __changed; \ -}) +extern int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty); #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG #define ptep_test_and_clear_young(vma, addr, ptep) ({ \ -- cgit v1.2.3 From f9fbf1a36a6bb6a639459802bccee01185ee3220 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 17 Mar 2008 16:37:04 -0700 Subject: x86/pgtable.h: demacro ptep_test_and_clear_young Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pgtable.c | 15 +++++++++++++++ include/asm-x86/pgtable.h | 11 ++--------- 2 files changed, 17 insertions(+), 9 deletions(-) (limited to 'include/asm-x86/pgtable.h') diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index e7cda2057e1d..54bd77a7eee0 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -280,3 +280,18 @@ int ptep_set_access_flags(struct vm_area_struct *vma, return changed; } + +int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + int ret = 0; + + if (pte_young(*ptep)) + ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, + &ptep->pte); + + if (ret) + pte_update(vma->vm_mm, addr, ptep); + + return ret; +} diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index feddddc2d97b..676408c98631 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -394,15 +394,8 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma, pte_t entry, int dirty); #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -#define ptep_test_and_clear_young(vma, addr, ptep) ({ \ - int __ret = 0; \ - if (pte_young(*(ptep))) \ - __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \ - &(ptep)->pte); \ - if (__ret) \ - pte_update((vma)->vm_mm, addr, ptep); \ - __ret; \ -}) +extern int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH #define ptep_clear_flush_young(vma, address, ptep) \ -- cgit v1.2.3 From c20311e165eb94f5ef12b15e452cc6ec24bd7813 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 17 Mar 2008 16:37:05 -0700 Subject: x86/pgtable.h: demacro ptep_clear_flush_young Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pgtable.c | 12 ++++++++++++ include/asm-x86/pgtable.h | 10 ++-------- 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include/asm-x86/pgtable.h') diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 54bd77a7eee0..af0c50161d95 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -295,3 +295,15 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, return ret; } + +int ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + int young; + + young = ptep_test_and_clear_young(vma, address, ptep); + if (young) + flush_tlb_page(vma, address); + + return young; +} diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 676408c98631..4ebea41ea70e 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -398,14 +398,8 @@ extern int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH -#define ptep_clear_flush_young(vma, address, ptep) \ -({ \ - int __young; \ - __young = ptep_test_and_clear_young((vma), (address), (ptep)); \ - if (__young) \ - flush_tlb_page(vma, address); \ - __young; \ -}) +extern int ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep); #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, -- cgit v1.2.3 From 68db065c845bd9d0eb96946ab104b4c82d0ae9da Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 17 Mar 2008 16:37:13 -0700 Subject: x86: unify KERNEL_PGD_PTRS Make KERNEL_PGD_PTRS common, as previously it was only being defined for 32-bit. There are a couple of follow-on changes from this: - KERNEL_PGD_PTRS was being defined in terms of USER_PGD_PTRS. The definition of USER_PGD_PTRS doesn't really make much sense on x86-64, since it can have two different user address-space configurations. I renamed USER_PGD_PTRS to KERNEL_PGD_BOUNDARY, which is meaningful for all of 32/32, 32/64 and 64/64 process configurations. - USER_PTRS_PER_PGD was also defined and was being used for similar purposes. Converting its users to KERNEL_PGD_BOUNDARY left it completely unused, and so I removed it. Signed-off-by: Jeremy Fitzhardinge Cc: Andi Kleen Cc: Zach Amsden Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/reboot.c | 4 ++-- arch/x86/kernel/smpboot.c | 4 ++-- arch/x86/kernel/vmi_32.c | 2 +- arch/x86/mach-voyager/voyager_smp.c | 4 ++-- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/pgtable.c | 12 ++++++------ include/asm-x86/pgtable.h | 4 +++- include/asm-x86/pgtable_32.h | 3 --- 8 files changed, 17 insertions(+), 18 deletions(-) (limited to 'include/asm-x86/pgtable.h') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 19c9386ac118..1791a751a772 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -15,7 +16,6 @@ # include # include # include -# include #else # include #endif @@ -275,7 +275,7 @@ void machine_real_restart(unsigned char *code, int length) /* Remap the kernel at virtual address zero, as well as offset zero from the kernel segment. This assumes the kernel segment starts at virtual address PAGE_OFFSET. */ - memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS); /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6a925394bc7e..2de2f7a2ed5d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1039,8 +1039,8 @@ int __cpuinit native_cpu_up(unsigned int cpu) #ifdef CONFIG_X86_32 /* init low mem mapping */ - clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); flush_tlb_all(); #endif diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 44f7ca153b71..956f38927aa7 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -320,7 +320,7 @@ static void check_zeroed_page(u32 pfn, int type, struct page *page) * pdes need to be zeroed. */ if (type & VMI_PAGE_CLONE) - limit = USER_PTRS_PER_PGD; + limit = KERNEL_PGD_BOUNDARY; for (i = 0; i < limit; i++) BUG_ON(ptr[i]); } diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 96f60c7cd124..394046effd78 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -560,8 +560,8 @@ static void __init do_boot_cpu(__u8 cpu) hijack_source.idt.Offset, stack_start.sp)); /* init lowmem identity mapping */ - clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); flush_tlb_all(); if (quad_boot) { diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index df490905f377..08aa1878fad4 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -457,7 +457,7 @@ void zap_low_mappings(void) * Note that "pgd_clear()" doesn't do it for * us, because pgd_clear() is a no-op on i386. */ - for (i = 0; i < USER_PTRS_PER_PGD; i++) { + for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) { #ifdef CONFIG_X86_PAE set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); #else diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index af0c50161d95..e2ac320e6151 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -104,7 +104,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) * -- wli */ #define UNSHARED_PTRS_PER_PGD \ - (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) + (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) static void pgd_ctor(void *p) { @@ -112,7 +112,7 @@ static void pgd_ctor(void *p) unsigned long flags; /* Clear usermode parts of PGD */ - memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); + memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t)); spin_lock_irqsave(&pgd_lock, flags); @@ -121,12 +121,12 @@ static void pgd_ctor(void *p) references from swapper_pg_dir. */ if (PAGETABLE_LEVELS == 2 || (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) { - clone_pgd_range(pgd + USER_PTRS_PER_PGD, - swapper_pg_dir + USER_PTRS_PER_PGD, + clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, KERNEL_PGD_PTRS); paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT, __pa(swapper_pg_dir) >> PAGE_SHIFT, - USER_PTRS_PER_PGD, + KERNEL_PGD_BOUNDARY, KERNEL_PGD_PTRS); } @@ -201,7 +201,7 @@ static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) return 0; } - if (i >= USER_PTRS_PER_PGD) + if (i >= KERNEL_PGD_BOUNDARY) memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), sizeof(pmd_t) * PTRS_PER_PMD); diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 4ebea41ea70e..e61075e70a54 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -1,7 +1,6 @@ #ifndef _ASM_X86_PGTABLE_H #define _ASM_X86_PGTABLE_H -#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1) #define FIRST_USER_ADDRESS 0 #define _PAGE_BIT_PRESENT 0 /* is present */ @@ -330,6 +329,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) # include "pgtable_64.h" #endif +#define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) +#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) + #ifndef __ASSEMBLY__ enum { diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index c4a643674458..cc52da32fbe2 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -48,9 +48,6 @@ void paging_init(void); #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) -#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) -#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) - /* Just any arbitrary offset to the start of the vmalloc VM area: the * current 8MB value just means that there will be a 8MB "hole" after the * physical memory until the kernel virtual memory starts. That means that -- cgit v1.2.3 From 85958b465c2e0de315575b1d3d7e7c2ce7126880 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 17 Mar 2008 16:37:14 -0700 Subject: x86: unify pgd ctor/dtor All pagetables need fundamentally the same setup and destruction, so just use the same code for everything. Signed-off-by: Jeremy Fitzhardinge Cc: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pgtable.c | 59 ++++++++++---------------------------------- include/asm-x86/pgtable.h | 16 ++++++++++++ include/asm-x86/pgtable_32.h | 15 ----------- include/asm-x86/pgtable_64.h | 2 +- 4 files changed, 30 insertions(+), 62 deletions(-) (limited to 'include/asm-x86/pgtable.h') diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index e2ac320e6151..50159764f694 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -59,50 +59,6 @@ static inline void pgd_list_del(pgd_t *pgd) list_del(&page->lru); } -#ifdef CONFIG_X86_64 -pgd_t *pgd_alloc(struct mm_struct *mm) -{ - unsigned boundary; - pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); - unsigned long flags; - if (!pgd) - return NULL; - spin_lock_irqsave(&pgd_lock, flags); - pgd_list_add(pgd); - spin_unlock_irqrestore(&pgd_lock, flags); - /* - * Copy kernel pointers in from init. - * Could keep a freelist or slab cache of those because the kernel - * part never changes. - */ - boundary = pgd_index(__PAGE_OFFSET); - memset(pgd, 0, boundary * sizeof(pgd_t)); - memcpy(pgd + boundary, - init_level4_pgt + boundary, - (PTRS_PER_PGD - boundary) * sizeof(pgd_t)); - return pgd; -} - -void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - unsigned long flags; - BUG_ON((unsigned long)pgd & (PAGE_SIZE-1)); - spin_lock_irqsave(&pgd_lock, flags); - pgd_list_del(pgd); - spin_unlock_irqrestore(&pgd_lock, flags); - free_page((unsigned long)pgd); -} -#else -/* - * List of all pgd's needed for non-PAE so it can invalidate entries - * in both cached and uncached pgd's; not needed for PAE since the - * kernel pmd is shared. If PAE were not to share the pmd a similar - * tactic would be needed. This is essentially codepath-based locking - * against pageattr.c; it is the unique case in which a valid change - * of kernel pagetables can't be lazily synchronized by vmalloc faults. - * vmalloc faults work because attached pagetables are never freed. - * -- wli - */ #define UNSHARED_PTRS_PER_PGD \ (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) @@ -120,7 +76,8 @@ static void pgd_ctor(void *p) ptes in non-PAE, or shared PMD in PAE), then just copy the references from swapper_pg_dir. */ if (PAGETABLE_LEVELS == 2 || - (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) { + (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || + PAGETABLE_LEVELS == 4) { clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, swapper_pg_dir + KERNEL_PGD_BOUNDARY, KERNEL_PGD_PTRS); @@ -149,6 +106,17 @@ static void pgd_dtor(void *pgd) spin_unlock_irqrestore(&pgd_lock, flags); } +/* + * List of all pgd's needed for non-PAE so it can invalidate entries + * in both cached and uncached pgd's; not needed for PAE since the + * kernel pmd is shared. If PAE were not to share the pmd a similar + * tactic would be needed. This is essentially codepath-based locking + * against pageattr.c; it is the unique case in which a valid change + * of kernel pagetables can't be lazily synchronized by vmalloc faults. + * vmalloc faults work because attached pagetables are never freed. + * -- wli + */ + #ifdef CONFIG_X86_PAE /* * Mop up any pmd pages which may still be attached to the pgd. @@ -264,7 +232,6 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) pgd_dtor(pgd); free_page((unsigned long)pgd); } -#endif int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index e61075e70a54..b8a08bd7bd48 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -438,6 +438,22 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, pte_update(mm, addr, ptep); } +/* + * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); + * + * dst - pointer to pgd range anwhere on a pgd page + * src - "" + * count - the number of pgds to copy. + * + * dst and src can be on the same page, but the range must not overlap, + * and must not cross a page boundary. + */ +static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) +{ + memcpy(dst, src, count * sizeof(pgd_t)); +} + + #include #endif /* __ASSEMBLY__ */ diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index cc52da32fbe2..168b6447cf18 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -105,21 +105,6 @@ extern int pmd_bad(pmd_t pmd); # include #endif -/* - * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); - * - * dst - pointer to pgd range anwhere on a pgd page - * src - "" - * count - the number of pgds to copy. - * - * dst and src can be on the same page, but the range must not overlap, - * and must not cross a page boundary. - */ -static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) -{ - memcpy(dst, src, count * sizeof(pgd_t)); -} - /* * Macro to mark a page protection value as "uncacheable". * On processors which do not support it, this is a no-op. diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h index 9fd87d0b6477..a3bbf8766c1d 100644 --- a/include/asm-x86/pgtable_64.h +++ b/include/asm-x86/pgtable_64.h @@ -24,7 +24,7 @@ extern void paging_init(void); #endif /* !__ASSEMBLY__ */ -#define SHARED_KERNEL_PMD 1 +#define SHARED_KERNEL_PMD 0 /* * PGDIR_SHIFT determines what a top-level page table entry can map -- cgit v1.2.3 From 7e675137a8e1a4d45822746456dd389b65745bf6 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Mon, 28 Apr 2008 02:13:00 -0700 Subject: mm: introduce pte_special pte bit s390 for one, cannot implement VM_MIXEDMAP with pfn_valid, due to their memory model (which is more dynamic than most). Instead, they had proposed to implement it with an additional path through vm_normal_page(), using a bit in the pte to determine whether or not the page should be refcounted: vm_normal_page() { ... if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { if (vma->vm_flags & VM_MIXEDMAP) { #ifdef s390 if (!mixedmap_refcount_pte(pte)) return NULL; #else if (!pfn_valid(pfn)) return NULL; #endif goto out; } ... } This is fine, however if we are allowed to use a bit in the pte to determine refcountedness, we can use that to _completely_ replace all the vma based schemes. So instead of adding more cases to the already complex vma-based scheme, we can have a clearly seperate and simple pte-based scheme (and get slightly better code generation in the process): vm_normal_page() { #ifdef s390 if (!mixedmap_refcount_pte(pte)) return NULL; return pte_page(pte); #else ... #endif } And finally, we may rather make this concept usable by any architecture rather than making it s390 only, so implement a new type of pte state for this. Unfortunately the old vma based code must stay, because some architectures may not be able to spare pte bits. This makes vm_normal_page a little bit more ugly than we would like, but the 2 cases are clearly seperate. So introduce a pte_special pte state, and use it in mm/memory.c. It is currently a noop for all architectures, so this doesn't actually result in any compiled code changes to mm/memory.o. BTW: I haven't put vm_normal_page() into arch code as-per an earlier suggestion. The reason is that, regardless of where vm_normal_page is actually implemented, the *abstraction* is still exactly the same. Also, while it depends on whether the architecture has pte_special or not, that is the only two possible cases, and it really isn't an arch specific function -- the role of the arch code should be to provide primitive functions and accessors with which to build the core code; pte_special does that. We do not want architectures to know or care about vm_normal_page itself, and we definitely don't want them being able to invent something new there out of sight of mm/ code. If we made vm_normal_page an arch function, then we have to make vm_insert_mixed (next patch) an arch function too. So I don't think moving it to arch code fundamentally improves any abstractions, while it does practically make the code more difficult to follow, for both mm and arch developers, and easier to misuse. [akpm@linux-foundation.org: build fix] Signed-off-by: Nick Piggin Acked-by: Carsten Otte Cc: Jared Hulbert Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-alpha/pgtable.h | 2 + include/asm-arm/pgtable.h | 3 ++ include/asm-avr32/pgtable.h | 8 +++ include/asm-cris/pgtable.h | 2 + include/asm-frv/pgtable.h | 2 + include/asm-ia64/pgtable.h | 3 ++ include/asm-m32r/pgtable.h | 10 ++++ include/asm-m68k/motorola_pgtable.h | 2 + include/asm-m68k/sun3_pgtable.h | 2 + include/asm-mips/pgtable.h | 2 + include/asm-mn10300/pgtable.h | 3 ++ include/asm-parisc/pgtable.h | 2 + include/asm-powerpc/pgtable-ppc32.h | 3 ++ include/asm-powerpc/pgtable-ppc64.h | 3 ++ include/asm-ppc/pgtable.h | 3 ++ include/asm-s390/pgtable.h | 10 ++++ include/asm-sh/pgtable_32.h | 3 ++ include/asm-sh/pgtable_64.h | 10 ++-- include/asm-sparc/pgtable.h | 7 +++ include/asm-sparc64/pgtable.h | 10 ++++ include/asm-um/pgtable.h | 10 ++++ include/asm-x86/pgtable.h | 10 ++++ include/asm-xtensa/pgtable.h | 4 ++ include/linux/mm.h | 4 +- mm/memory.c | 99 ++++++++++++++++++++----------------- 25 files changed, 168 insertions(+), 49 deletions(-) (limited to 'include/asm-x86/pgtable.h') diff --git a/include/asm-alpha/pgtable.h b/include/asm-alpha/pgtable.h index 99037b032357..05ce5fba43e3 100644 --- a/include/asm-alpha/pgtable.h +++ b/include/asm-alpha/pgtable.h @@ -268,6 +268,7 @@ extern inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_FOW); } extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } extern inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +extern inline int pte_special(pte_t pte) { return 0; } extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOW; return pte; } extern inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~(__DIRTY_BITS); return pte; } @@ -275,6 +276,7 @@ extern inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~(__ACCESS_BITS); ret extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) &= ~_PAGE_FOW; return pte; } extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; return pte; } extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; return pte; } +extern inline pte_t pte_mkspecial(pte_t pte) { return pte; } #define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) diff --git a/include/asm-arm/pgtable.h b/include/asm-arm/pgtable.h index 5e0182485d8c..5571c13c3f3b 100644 --- a/include/asm-arm/pgtable.h +++ b/include/asm-arm/pgtable.h @@ -260,6 +260,7 @@ extern struct page *empty_zero_page; #define pte_write(pte) (pte_val(pte) & L_PTE_WRITE) #define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY) #define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG) +#define pte_special(pte) (0) /* * The following only works if pte_present() is not true. @@ -280,6 +281,8 @@ PTE_BIT_FUNC(mkdirty, |= L_PTE_DIRTY); PTE_BIT_FUNC(mkold, &= ~L_PTE_YOUNG); PTE_BIT_FUNC(mkyoung, |= L_PTE_YOUNG); +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } + /* * Mark the prot value as uncacheable and unbufferable. */ diff --git a/include/asm-avr32/pgtable.h b/include/asm-avr32/pgtable.h index 3ae7b548fce7..c0e5e29417df 100644 --- a/include/asm-avr32/pgtable.h +++ b/include/asm-avr32/pgtable.h @@ -212,6 +212,10 @@ static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } +static inline int pte_special(pte_t pte) +{ + return 0; +} /* * The following only work if pte_present() is not true. @@ -252,6 +256,10 @@ static inline pte_t pte_mkyoung(pte_t pte) set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } +static inline pte_t pte_mkspecial(pte_t pte) +{ + return pte; +} #define pmd_none(x) (!pmd_val(x)) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) diff --git a/include/asm-cris/pgtable.h b/include/asm-cris/pgtable.h index a2607575681b..4c373624ee97 100644 --- a/include/asm-cris/pgtable.h +++ b/include/asm-cris/pgtable.h @@ -115,6 +115,7 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WR static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { @@ -162,6 +163,7 @@ static inline pte_t pte_mkyoung(pte_t pte) } return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * Conversion functions: convert a page and protection to a page entry, diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index 4e219046fe42..83c51aba534b 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h @@ -380,6 +380,7 @@ static inline pmd_t *pmd_offset(pud_t *dir, unsigned long address) static inline int pte_dirty(pte_t pte) { return (pte).pte & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return (pte).pte & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return !((pte).pte & _PAGE_WP); } +static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_mkclean(pte_t pte) { (pte).pte &= ~_PAGE_DIRTY; return pte; } static inline pte_t pte_mkold(pte_t pte) { (pte).pte &= ~_PAGE_ACCESSED; return pte; } @@ -387,6 +388,7 @@ static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte |= _PAGE_WP; return pte static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte &= ~_PAGE_WP; return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h index ed70862ea247..7a9bff47564f 100644 --- a/include/asm-ia64/pgtable.h +++ b/include/asm-ia64/pgtable.h @@ -302,6 +302,8 @@ ia64_phys_addr_valid (unsigned long addr) #define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0) #define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0) #define pte_file(pte) ((pte_val(pte) & _PAGE_FILE) != 0) +#define pte_special(pte) 0 + /* * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit in the * access rights: @@ -313,6 +315,7 @@ ia64_phys_addr_valid (unsigned long addr) #define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) #define pte_mkhuge(pte) (__pte(pte_val(pte))) +#define pte_mkspecial(pte) (pte) /* * Because ia64's Icache and Dcache is not coherent (on a cpu), we need to diff --git a/include/asm-m32r/pgtable.h b/include/asm-m32r/pgtable.h index 86505387be08..e6359c566b50 100644 --- a/include/asm-m32r/pgtable.h +++ b/include/asm-m32r/pgtable.h @@ -214,6 +214,11 @@ static inline int pte_file(pte_t pte) return pte_val(pte) & _PAGE_FILE; } +static inline int pte_special(pte_t pte) +{ + return 0; +} + static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; @@ -250,6 +255,11 @@ static inline pte_t pte_mkwrite(pte_t pte) return pte; } +static inline pte_t pte_mkspecial(pte_t pte) +{ + return pte; +} + static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { return test_and_clear_bit(_PAGE_BIT_ACCESSED, ptep); diff --git a/include/asm-m68k/motorola_pgtable.h b/include/asm-m68k/motorola_pgtable.h index 13135d4821d8..8e9a8a754dde 100644 --- a/include/asm-m68k/motorola_pgtable.h +++ b/include/asm-m68k/motorola_pgtable.h @@ -168,6 +168,7 @@ static inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_RONLY); static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_RONLY; return pte; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } @@ -185,6 +186,7 @@ static inline pte_t pte_mkcache(pte_t pte) pte_val(pte) = (pte_val(pte) & _CACHEMASK040) | m68k_supervisor_cachemode; return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } #define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) diff --git a/include/asm-m68k/sun3_pgtable.h b/include/asm-m68k/sun3_pgtable.h index b766fc261bde..f847ec732d62 100644 --- a/include/asm-m68k/sun3_pgtable.h +++ b/include/asm-m68k/sun3_pgtable.h @@ -169,6 +169,7 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & SUN3_PAGE_WRITEA static inline int pte_dirty(pte_t pte) { return pte_val(pte) & SUN3_PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; } +static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_WRITEABLE; return pte; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_MODIFIED; return pte; } @@ -181,6 +182,7 @@ static inline pte_t pte_mknocache(pte_t pte) { pte_val(pte) |= SUN3_PAGE_NOCACHE //static inline pte_t pte_mkcache(pte_t pte) { pte_val(pte) &= SUN3_PAGE_NOCACHE; return pte; } // until then, use: static inline pte_t pte_mkcache(pte_t pte) { return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; diff --git a/include/asm-mips/pgtable.h b/include/asm-mips/pgtable.h index 17a7703a2969..782221e57c0a 100644 --- a/include/asm-mips/pgtable.h +++ b/include/asm-mips/pgtable.h @@ -285,6 +285,8 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } #endif +static inline int pte_special(pte_t pte) { return 0; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * Macro to make mark a page protection value as "uncacheable". Note diff --git a/include/asm-mn10300/pgtable.h b/include/asm-mn10300/pgtable.h index 375c4941deda..6dc30fc827c4 100644 --- a/include/asm-mn10300/pgtable.h +++ b/include/asm-mn10300/pgtable.h @@ -224,6 +224,7 @@ static inline int pte_read(pte_t pte) { return pte_val(pte) & __PAGE_PROT_USER; static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return pte_val(pte) & __PAGE_PROT_WRITE; } +static inline int pte_special(pte_t pte){ return 0; } /* * The following only works if pte_present() is not true. @@ -265,6 +266,8 @@ static inline pte_t pte_mkwrite(pte_t pte) return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } + #define pte_ERROR(e) \ printk(KERN_ERR "%s:%d: bad pte %08lx.\n", \ __FILE__, __LINE__, pte_val(e)) diff --git a/include/asm-parisc/pgtable.h b/include/asm-parisc/pgtable.h index dc86adbec916..470a4b88124d 100644 --- a/include/asm-parisc/pgtable.h +++ b/include/asm-parisc/pgtable.h @@ -323,6 +323,7 @@ static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } @@ -330,6 +331,7 @@ static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_WRITE; ret static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * Conversion functions: convert a page and protection to a page entry, diff --git a/include/asm-powerpc/pgtable-ppc32.h b/include/asm-powerpc/pgtable-ppc32.h index daea7692d070..7c97b5a08d08 100644 --- a/include/asm-powerpc/pgtable-ppc32.h +++ b/include/asm-powerpc/pgtable-ppc32.h @@ -504,6 +504,7 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_special(pte_t pte) { return 0; } static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } @@ -521,6 +522,8 @@ static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { + return pte; } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { diff --git a/include/asm-powerpc/pgtable-ppc64.h b/include/asm-powerpc/pgtable-ppc64.h index dd4c26dc57d2..27f18695f7d6 100644 --- a/include/asm-powerpc/pgtable-ppc64.h +++ b/include/asm-powerpc/pgtable-ppc64.h @@ -239,6 +239,7 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW;} static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY;} static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;} static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE;} +static inline int pte_special(pte_t pte) { return 0; } static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } @@ -257,6 +258,8 @@ static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkhuge(pte_t pte) { return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { + return pte; } /* Atomic PTE updates */ static inline unsigned long pte_update(struct mm_struct *mm, diff --git a/include/asm-ppc/pgtable.h b/include/asm-ppc/pgtable.h index 70435d32129a..55f9d38e3bf8 100644 --- a/include/asm-ppc/pgtable.h +++ b/include/asm-ppc/pgtable.h @@ -483,6 +483,7 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_special(pte_t pte) { return 0; } static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } @@ -500,6 +501,8 @@ static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { + return pte; } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 4c0698c0dda5..76e8a7904e8a 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -518,6 +518,11 @@ static inline int pte_file(pte_t pte) return (pte_val(pte) & mask) == _PAGE_TYPE_FILE; } +static inline int pte_special(pte_t pte) +{ + return 0; +} + #define __HAVE_ARCH_PTE_SAME #define pte_same(a,b) (pte_val(a) == pte_val(b)) @@ -715,6 +720,11 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } +static inline pte_t pte_mkspecial(pte_t pte) +{ + return pte; +} + #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) diff --git a/include/asm-sh/pgtable_32.h b/include/asm-sh/pgtable_32.h index 3e3557c53c55..cbc731d35c25 100644 --- a/include/asm-sh/pgtable_32.h +++ b/include/asm-sh/pgtable_32.h @@ -326,6 +326,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte) #define pte_dirty(pte) ((pte).pte_low & _PAGE_DIRTY) #define pte_young(pte) ((pte).pte_low & _PAGE_ACCESSED) #define pte_file(pte) ((pte).pte_low & _PAGE_FILE) +#define pte_special(pte) (0) #ifdef CONFIG_X2TLB #define pte_write(pte) ((pte).pte_high & _PAGE_EXT_USER_WRITE) @@ -356,6 +357,8 @@ PTE_BIT_FUNC(low, mkdirty, |= _PAGE_DIRTY); PTE_BIT_FUNC(low, mkold, &= ~_PAGE_ACCESSED); PTE_BIT_FUNC(low, mkyoung, |= _PAGE_ACCESSED); +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } + /* * Macro and implementation to make a page protection as uncachable. */ diff --git a/include/asm-sh/pgtable_64.h b/include/asm-sh/pgtable_64.h index f9dd9d311441..c78990cda557 100644 --- a/include/asm-sh/pgtable_64.h +++ b/include/asm-sh/pgtable_64.h @@ -254,10 +254,11 @@ extern void __handle_bad_pmd_kernel(pmd_t * pmd); /* * The following have defined behavior only work if pte_present() is true. */ -static inline int pte_dirty(pte_t pte){ return pte_val(pte) & _PAGE_DIRTY; } -static inline int pte_young(pte_t pte){ return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } -static inline int pte_write(pte_t pte){ return pte_val(pte) & _PAGE_WRITE; } +static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } +static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } +static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } +static inline int pte_special(pte_t pte){ return 0; } static inline pte_t pte_wrprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_WRITE)); return pte; } static inline pte_t pte_mkclean(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY)); return pte; } @@ -266,6 +267,7 @@ static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; } static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; } static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_SZHUGE)); return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* diff --git a/include/asm-sparc/pgtable.h b/include/asm-sparc/pgtable.h index 2cc235b74d94..d84af6d95f5c 100644 --- a/include/asm-sparc/pgtable.h +++ b/include/asm-sparc/pgtable.h @@ -219,6 +219,11 @@ static inline int pte_file(pte_t pte) return pte_val(pte) & BTFIXUP_HALF(pte_filei); } +static inline int pte_special(pte_t pte) +{ + return 0; +} + /* */ BTFIXUPDEF_HALF(pte_wrprotecti) @@ -251,6 +256,8 @@ BTFIXUPDEF_CALL_CONST(pte_t, pte_mkyoung, pte_t) #define pte_mkdirty(pte) BTFIXUP_CALL(pte_mkdirty)(pte) #define pte_mkyoung(pte) BTFIXUP_CALL(pte_mkyoung)(pte) +#define pte_mkspecial(pte) (pte) + #define pfn_pte(pfn, prot) mk_pte(pfn_to_page(pfn), prot) BTFIXUPDEF_CALL(unsigned long, pte_pfn, pte_t) diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h index 549e45266b68..0e200e7acec7 100644 --- a/include/asm-sparc64/pgtable.h +++ b/include/asm-sparc64/pgtable.h @@ -506,6 +506,11 @@ static inline pte_t pte_mkyoung(pte_t pte) return __pte(pte_val(pte) | mask); } +static inline pte_t pte_mkspecial(pte_t pte) +{ + return pte; +} + static inline unsigned long pte_young(pte_t pte) { unsigned long mask; @@ -608,6 +613,11 @@ static inline unsigned long pte_present(pte_t pte) return val; } +static inline int pte_special(pte_t pte) +{ + return 0; +} + #define pmd_set(pmdp, ptep) \ (pmd_val(*(pmdp)) = (__pa((unsigned long) (ptep)) >> 11UL)) #define pud_set(pudp, pmdp) \ diff --git a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h index 4102b443e925..02db81b7b86e 100644 --- a/include/asm-um/pgtable.h +++ b/include/asm-um/pgtable.h @@ -173,6 +173,11 @@ static inline int pte_newprot(pte_t pte) return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT))); } +static inline int pte_special(pte_t pte) +{ + return 0; +} + /* * ================================= * Flags setting section. @@ -241,6 +246,11 @@ static inline pte_t pte_mknewpage(pte_t pte) return(pte); } +static inline pte_t pte_mkspecial(pte_t pte) +{ + return(pte); +} + static inline void set_pte(pte_t *pteptr, pte_t pteval) { pte_copy(*pteptr, pteval); diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index a496d6335d3b..801b31f71452 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -195,6 +195,11 @@ static inline int pte_exec(pte_t pte) return !(pte_val(pte) & _PAGE_NX); } +static inline int pte_special(pte_t pte) +{ + return 0; +} + static inline int pmd_large(pmd_t pte) { return (pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == @@ -256,6 +261,11 @@ static inline pte_t pte_clrglobal(pte_t pte) return __pte(pte_val(pte) & ~(pteval_t)_PAGE_GLOBAL); } +static inline pte_t pte_mkspecial(pte_t pte) +{ + return pte; +} + extern pteval_t __supported_pte_mask; static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) diff --git a/include/asm-xtensa/pgtable.h b/include/asm-xtensa/pgtable.h index c8b024a48b4d..8014d96b21f1 100644 --- a/include/asm-xtensa/pgtable.h +++ b/include/asm-xtensa/pgtable.h @@ -210,6 +210,8 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITABLE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_special(pte_t pte) { return 0; } + static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~(_PAGE_WRITABLE | _PAGE_HW_WRITE); return pte; } static inline pte_t pte_mkclean(pte_t pte) @@ -222,6 +224,8 @@ static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITABLE; return pte; } +static inline pte_t pte_mkspecial(pte_t pte) + { return pte; } /* * Conversion functions: convert a page and protection to a page entry, diff --git a/include/linux/mm.h b/include/linux/mm.h index c657ea0bd6aa..ba86ddaa2bb8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -721,7 +721,9 @@ struct zap_details { unsigned long truncate_count; /* Compare vm_truncate_count */ }; -struct page *vm_normal_page(struct vm_area_struct *, unsigned long, pte_t); +struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, + pte_t pte); + unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *); unsigned long unmap_vmas(struct mmu_gather **tlb, diff --git a/mm/memory.c b/mm/memory.c index 0da414c383e7..c5e88bcd8ec3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -371,33 +371,37 @@ static inline int is_cow_mapping(unsigned int flags) } /* - * This function gets the "struct page" associated with a pte or returns - * NULL if no "struct page" is associated with the pte. + * vm_normal_page -- This function gets the "struct page" associated with a pte. * - * A raw VM_PFNMAP mapping (ie. one that is not COWed) may not have any "struct - * page" backing, and even if they do, they are not refcounted. COWed pages of - * a VM_PFNMAP do always have a struct page, and they are normally refcounted - * (they are _normal_ pages). + * "Special" mappings do not wish to be associated with a "struct page" (either + * it doesn't exist, or it exists but they don't want to touch it). In this + * case, NULL is returned here. "Normal" mappings do have a struct page. * - * So a raw PFNMAP mapping will have each page table entry just pointing - * to a page frame number, and as far as the VM layer is concerned, those do - * not have pages associated with them - even if the PFN might point to memory - * that otherwise is perfectly fine and has a "struct page". + * There are 2 broad cases. Firstly, an architecture may define a pte_special() + * pte bit, in which case this function is trivial. Secondly, an architecture + * may not have a spare pte bit, which requires a more complicated scheme, + * described below. + * + * A raw VM_PFNMAP mapping (ie. one that is not COWed) is always considered a + * special mapping (even if there are underlying and valid "struct pages"). + * COWed pages of a VM_PFNMAP are always normal. * * The way we recognize COWed pages within VM_PFNMAP mappings is through the * rules set up by "remap_pfn_range()": the vma will have the VM_PFNMAP bit - * set, and the vm_pgoff will point to the first PFN mapped: thus every - * page that is a raw mapping will always honor the rule + * set, and the vm_pgoff will point to the first PFN mapped: thus every special + * mapping will always honor the rule * * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT) * - * A call to vm_normal_page() will return NULL for such a page. + * And for normal mappings this is false. + * + * This restricts such mappings to be a linear translation from virtual address + * to pfn. To get around this restriction, we allow arbitrary mappings so long + * as the vma is not a COW mapping; in that case, we know that all ptes are + * special (because none can have been COWed). * - * If the page doesn't follow the "remap_pfn_range()" rule in a VM_PFNMAP - * then the page has been COW'ed. A COW'ed page _does_ have a "struct page" - * associated with it even if it is in a VM_PFNMAP range. Calling - * vm_normal_page() on such a page will therefore return the "struct page". * + * In order to support COW of arbitrary special mappings, we have VM_MIXEDMAP. * * VM_MIXEDMAP mappings can likewise contain memory with or without "struct * page" backing, however the difference is that _all_ pages with a struct @@ -407,16 +411,29 @@ static inline int is_cow_mapping(unsigned int flags) * advantage is that we don't have to follow the strict linearity rule of * PFNMAP mappings in order to support COWable mappings. * - * A call to vm_normal_page() with a VM_MIXEDMAP mapping will return the - * associated "struct page" or NULL for memory not backed by a "struct page". - * - * - * All other mappings should have a valid struct page, which will be - * returned by a call to vm_normal_page(). */ -struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte) +#ifdef __HAVE_ARCH_PTE_SPECIAL +# define HAVE_PTE_SPECIAL 1 +#else +# define HAVE_PTE_SPECIAL 0 +#endif +struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, + pte_t pte) { - unsigned long pfn = pte_pfn(pte); + unsigned long pfn; + + if (HAVE_PTE_SPECIAL) { + if (likely(!pte_special(pte))) { + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + return pte_page(pte); + } + VM_BUG_ON(!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))); + return NULL; + } + + /* !HAVE_PTE_SPECIAL case follows: */ + + pfn = pte_pfn(pte); if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { if (vma->vm_flags & VM_MIXEDMAP) { @@ -424,7 +441,8 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_ return NULL; goto out; } else { - unsigned long off = (addr-vma->vm_start) >> PAGE_SHIFT; + unsigned long off; + off = (addr - vma->vm_start) >> PAGE_SHIFT; if (pfn == vma->vm_pgoff + off) return NULL; if (!is_cow_mapping(vma->vm_flags)) @@ -432,25 +450,12 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_ } } -#ifdef CONFIG_DEBUG_VM - /* - * Add some anal sanity checks for now. Eventually, - * we should just do "return pfn_to_page(pfn)", but - * in the meantime we check that we get a valid pfn, - * and that the resulting page looks ok. - */ - if (unlikely(!pfn_valid(pfn))) { - print_bad_pte(vma, pte, addr); - return NULL; - } -#endif + VM_BUG_ON(!pfn_valid(pfn)); /* - * NOTE! We still have PageReserved() pages in the page - * tables. + * NOTE! We still have PageReserved() pages in the page tables. * - * The PAGE_ZERO() pages and various VDSO mappings can - * cause them to exist. + * eg. VDSO mappings can cause them to exist. */ out: return pfn_to_page(pfn); @@ -1263,6 +1268,12 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, pte_t *pte, entry; spinlock_t *ptl; + /* + * Technically, architectures with pte_special can avoid all these + * restrictions (same for remap_pfn_range). However we would like + * consistency in testing and feature parity among all, so we should + * try to keep these invariants in place for everybody. + */ BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == (VM_PFNMAP|VM_MIXEDMAP)); @@ -1278,7 +1289,7 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, goto out_unlock; /* Ok, finally just insert the thing.. */ - entry = pfn_pte(pfn, vma->vm_page_prot); + entry = pte_mkspecial(pfn_pte(pfn, vma->vm_page_prot)); set_pte_at(mm, addr, pte, entry); update_mmu_cache(vma, addr, entry); @@ -1309,7 +1320,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, arch_enter_lazy_mmu_mode(); do { BUG_ON(!pte_none(*pte)); - set_pte_at(mm, addr, pte, pfn_pte(pfn, prot)); + set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot))); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); -- cgit v1.2.3