From 48e239572271cf79412d90753a721242a765f7d9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner <tglx@linutronix.de> Date: Sat, 24 May 2008 17:24:34 +0200 Subject: x86: fixup the fallout of the bitops changes Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- arch/x86/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm/pgtable.c') diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 50159764f694..ee1d6d39edd4 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -255,7 +255,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, if (pte_young(*ptep)) ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, - &ptep->pte); + (unsigned long *) &ptep->pte); if (ret) pte_update(vma->vm_mm, addr, ptep); -- cgit v1.2.3 From 7c7e6e07e2a7c0d2d96389f4f0540e44a80ecdaa Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge <jeremy@goop.org> Date: Tue, 17 Jun 2008 11:41:54 -0700 Subject: x86: unify __set_fixmap In both cases, I went with the 32-bit behaviour. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/mm/init_64.c | 12 ------------ arch/x86/mm/pgtable.c | 14 ++++++++++++++ arch/x86/mm/pgtable_32.c | 14 +------------- include/asm-x86/fixmap.h | 2 ++ 4 files changed, 17 insertions(+), 25 deletions(-) (limited to 'arch/x86/mm/pgtable.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 156e6d7b0e32..e5f531949857 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -213,18 +213,6 @@ void __init cleanup_highmap(void) } } -/* NOTE: this is meant to be run only at boot */ -void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot) -{ - unsigned long address = __fix_to_virt(idx); - - if (idx >= __end_of_fixed_addresses) { - printk(KERN_ERR "Invalid __set_fixmap\n"); - return; - } - set_pte_phys(address, phys, prot); -} - static unsigned long __initdata table_start; static unsigned long __meminitdata table_end; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 50159764f694..3ebebe480b53 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -274,3 +274,17 @@ int ptep_clear_flush_young(struct vm_area_struct *vma, return young; } + +int fixmaps_set; + +void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) +{ + unsigned long address = __fix_to_virt(idx); + + if (idx >= __end_of_fixed_addresses) { + BUG(); + return; + } + set_pte_pfn(address, phys >> PAGE_SHIFT, flags); + fixmaps_set++; +} diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 369cf065b6a4..3f97c3c87288 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -145,18 +145,6 @@ static int fixmaps; unsigned long __FIXADDR_TOP = 0xfffff000; EXPORT_SYMBOL(__FIXADDR_TOP); -void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) -{ - unsigned long address = __fix_to_virt(idx); - - if (idx >= __end_of_fixed_addresses) { - BUG(); - return; - } - set_pte_pfn(address, phys >> PAGE_SHIFT, flags); - fixmaps++; -} - /** * reserve_top_address - reserves a hole in the top of kernel address space * @reserve - size of hole to reserve @@ -166,7 +154,7 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) */ void reserve_top_address(unsigned long reserve) { - BUG_ON(fixmaps > 0); + BUG_ON(fixmaps_set > 0); printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", (int)-reserve); __FIXADDR_TOP = -reserve - PAGE_SIZE; diff --git a/include/asm-x86/fixmap.h b/include/asm-x86/fixmap.h index 01b0f98f9929..934d6b49b530 100644 --- a/include/asm-x86/fixmap.h +++ b/include/asm-x86/fixmap.h @@ -7,6 +7,8 @@ # include "fixmap_64.h" #endif +extern int fixmaps_set; + extern void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags); #define set_fixmap(idx, phys) \ -- cgit v1.2.3 From d494a96125c99f1e37b1f831b29b42c9b712ee05 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge <jeremy@goop.org> Date: Tue, 17 Jun 2008 11:41:59 -0700 Subject: x86: implement set_pte_vaddr Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/mm/init_64.c | 9 ++++----- arch/x86/mm/pgtable.c | 2 +- arch/x86/mm/pgtable_32.c | 6 +++--- include/asm-x86/pgtable.h | 3 +++ 4 files changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/x86/mm/pgtable.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e5f531949857..74fae8335128 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -135,15 +135,15 @@ static __init void *spp_getpage(void) return ptr; } -static void -set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) +void +set_pte_vaddr(unsigned long vaddr, pte_t new_pte) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte, new_pte; + pte_t *pte; - pr_debug("set_pte_phys %lx to %lx\n", vaddr, phys); + pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(new_pte)); pgd = pgd_offset_k(vaddr); if (pgd_none(*pgd)) { @@ -170,7 +170,6 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) return; } } - new_pte = pfn_pte(phys >> PAGE_SHIFT, prot); pte = pte_offset_kernel(pmd, vaddr); if (!pte_none(*pte) && pte_val(new_pte) && diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3ebebe480b53..7498124e30fc 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -285,6 +285,6 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) BUG(); return; } - set_pte_pfn(address, phys >> PAGE_SHIFT, flags); + set_pte_vaddr(address, pfn_pte(phys >> PAGE_SHIFT, flags)); fixmaps_set++; } diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 3f97c3c87288..0662f345212f 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -71,7 +71,7 @@ void show_mem(void) * Associate a virtual page frame with a given physical page frame * and protection flags for that frame. */ -static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) +void set_pte_vaddr(unsigned long vaddr, pte_t pteval) { pgd_t *pgd; pud_t *pud; @@ -94,8 +94,8 @@ static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) return; } pte = pte_offset_kernel(pmd, vaddr); - if (pgprot_val(flags)) - set_pte_present(&init_mm, vaddr, pte, pfn_pte(pfn, flags)); + if (pte_val(pteval)) + set_pte_present(&init_mm, vaddr, pte, pteval); else pte_clear(&init_mm, vaddr, pte); diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 97c271b2910b..702f2699c6df 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -318,6 +318,9 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot); #endif +/* Install a pte for a particular vaddr in kernel space. */ +void set_pte_vaddr(unsigned long vaddr, pte_t pte); + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else /* !CONFIG_PARAVIRT */ -- cgit v1.2.3 From aeaaa59c7e15dcfaaf57ce069ef81683067d575d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge <jeremy@goop.org> Date: Tue, 17 Jun 2008 11:42:01 -0700 Subject: x86/paravirt/xen: add set_fixmap pv_mmu_ops Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Signed-off-by: Juan Quintela <quintela@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/kernel/paravirt.c | 2 ++ arch/x86/mm/pgtable.c | 9 +++++++-- arch/x86/xen/enlighten.c | 29 +++++++++++++++++++++++++++++ include/asm-x86/fixmap.h | 14 ++++++++++++-- include/asm-x86/paravirt.h | 13 +++++++++++++ 5 files changed, 63 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm/pgtable.c') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 74f0c5ea2a03..cf06670349dc 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -416,6 +416,8 @@ struct pv_mmu_ops pv_mmu_ops = { .enter = paravirt_nop, .leave = paravirt_nop, }, + + .set_fixmap = native_set_fixmap, }; EXPORT_SYMBOL_GPL(pv_time_ops); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 7498124e30fc..e9fb66361fc8 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -277,7 +277,7 @@ int ptep_clear_flush_young(struct vm_area_struct *vma, int fixmaps_set; -void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) +void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) { unsigned long address = __fix_to_virt(idx); @@ -285,6 +285,11 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) BUG(); return; } - set_pte_vaddr(address, pfn_pte(phys >> PAGE_SHIFT, flags)); + set_pte_vaddr(address, pte); fixmaps_set++; } + +void native_set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags) +{ + __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); +} diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c8a56e457d61..0ad8a64a2e05 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -960,6 +960,33 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, return ret; } +static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) +{ + pte_t pte; + + phys >>= PAGE_SHIFT; + + switch (idx) { + case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: +#ifdef CONFIG_X86_F00F_BUG + case FIX_F00F_IDT: +#endif + case FIX_WP_TEST: + case FIX_VDSO: +#ifdef CONFIG_X86_LOCAL_APIC + case FIX_APIC_BASE: /* maps dummy local APIC */ +#endif + pte = pfn_pte(phys, prot); + break; + + default: + pte = mfn_pte(phys, prot); + break; + } + + __native_set_fixmap(idx, pte); +} + static const struct pv_info xen_info __initdata = { .paravirt_enabled = 1, .shared_kernel_pmd = 0, @@ -1112,6 +1139,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .enter = paravirt_enter_lazy_mmu, .leave = xen_leave_lazy, }, + + .set_fixmap = xen_set_fixmap, }; #ifdef CONFIG_SMP diff --git a/include/asm-x86/fixmap.h b/include/asm-x86/fixmap.h index 934d6b49b530..44d4f8217349 100644 --- a/include/asm-x86/fixmap.h +++ b/include/asm-x86/fixmap.h @@ -9,8 +9,18 @@ extern int fixmaps_set; -extern void __set_fixmap(enum fixed_addresses idx, - unsigned long phys, pgprot_t flags); +void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); +void native_set_fixmap(enum fixed_addresses idx, + unsigned long phys, pgprot_t flags); + +#ifndef CONFIG_PARAVIRT +static inline void __set_fixmap(enum fixed_addresses idx, + unsigned long phys, pgprot_t flags) +{ + native_set_fixmap(idx, phys, flags); +} +#endif + #define set_fixmap(idx, phys) \ __set_fixmap(idx, phys, PAGE_KERNEL) diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 0f13b945e240..af85caf9a799 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -273,6 +273,13 @@ struct pv_mmu_ops { #endif struct pv_lazy_ops lazy_mode; + + /* dom0 ops */ + + /* Sometimes the physical address is a pfn, and sometimes its + an mfn. We can tell which is which from the index. */ + void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, + unsigned long phys, pgprot_t flags); }; /* This contains all the paravirt structures: we get a convenient @@ -1252,6 +1259,12 @@ static inline void arch_flush_lazy_mmu_mode(void) } } +static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, + unsigned long phys, pgprot_t flags) +{ + pv_mmu_ops.set_fixmap(idx, phys, flags); +} + void _paravirt_nop(void); #define paravirt_nop ((void *)_paravirt_nop) -- cgit v1.2.3 From a1d5a8691f1b6c92491747bea3b778b184fa5837 Mon Sep 17 00:00:00 2001 From: Ingo Molnar <mingo@elte.hu> Date: Fri, 20 Jun 2008 15:34:46 +0200 Subject: x86: unify __set_fixmap, fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix build failure: arch/x86/mm/pgtable.c:280: warning: ‘enum fixed_addresses’ declared inside parameter list arch/x86/mm/pgtable.c:280: warning: its scope is only this definition or declaration, which is probably not what you want arch/x86/mm/pgtable.c:280: error: parameter 1 (‘idx’) has incomplete type Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/mm/pgtable.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/mm/pgtable.c') diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index e9fb66361fc8..892fd1892b8d 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -2,6 +2,7 @@ #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/tlb.h> +#include <asm/fixmap.h> pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { -- cgit v1.2.3 From eba0045ff87bab465d3c80c289f3bf709c1800f5 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge <jeremy@goop.org> Date: Wed, 25 Jun 2008 00:19:12 -0400 Subject: x86/paravirt: add a pgd_alloc/free hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add hooks which are called at pgd_alloc/free time. The pgd_alloc hook may return an error code, which if non-zero, causes the pgd allocation to be failed. The hooks may be used to allocate/free auxillary per-pgd information. also fix: > * Ingo Molnar <mingo@elte.hu> wrote: > > include/asm/pgalloc.h: In function ‘paravirt_pgd_free': > include/asm/pgalloc.h:14: error: parameter name omitted > arch/x86/kernel/entry_64.S: In file included from > arch/x86/kernel/traps_64.c:51:include/asm/pgalloc.h: In function ‘paravirt_pgd_free': > include/asm/pgalloc.h:14: error: parameter name omitted Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: xen-devel <xen-devel@lists.xensource.com> Cc: Stephen Tweedie <sct@redhat.com> Cc: Eduardo Habkost <ehabkost@redhat.com> Cc: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/kernel/paravirt.c | 4 ++++ arch/x86/mm/pgtable.c | 13 ++++++++----- arch/x86/xen/enlighten.c | 4 ++++ include/asm-x86/paravirt.h | 19 ++++++++++++++++++- include/asm-x86/pgalloc.h | 4 ++++ 5 files changed, 38 insertions(+), 6 deletions(-) (limited to 'arch/x86/mm/pgtable.c') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e9b504537212..78c9a1b9e6b0 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -30,6 +30,7 @@ #include <asm/setup.h> #include <asm/arch_hooks.h> #include <asm/time.h> +#include <asm/pgalloc.h> #include <asm/irq.h> #include <asm/delay.h> #include <asm/fixmap.h> @@ -366,6 +367,9 @@ struct pv_mmu_ops pv_mmu_ops = { .flush_tlb_single = native_flush_tlb_single, .flush_tlb_others = native_flush_tlb_others, + .pgd_alloc = __paravirt_pgd_alloc, + .pgd_free = paravirt_nop, + .alloc_pte = paravirt_nop, .alloc_pmd = paravirt_nop, .alloc_pmd_clone = paravirt_nop, diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 45b99ac39480..418c4432fb39 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -215,13 +215,15 @@ pgd_t *pgd_alloc(struct mm_struct *mm) /* so that alloc_pmd can use it */ mm->pgd = pgd; - if (pgd) + if (pgd) { pgd_ctor(pgd); - if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { - pgd_dtor(pgd); - free_page((unsigned long)pgd); - pgd = NULL; + if (paravirt_pgd_alloc(mm) != 0 || + !pgd_prepopulate_pmd(mm, pgd)) { + pgd_dtor(pgd); + free_page((unsigned long)pgd); + pgd = NULL; + } } return pgd; @@ -231,6 +233,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) { pgd_mop_up_pmds(mm, pgd); pgd_dtor(pgd); + paravirt_pgd_free(mm, pgd); free_page((unsigned long)pgd); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 76ad1efaf09e..d62f14e20708 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -45,6 +45,7 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/reboot.h> +#include <asm/pgalloc.h> #include "xen-ops.h" #include "mmu.h" @@ -1153,6 +1154,9 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .pte_update = paravirt_nop, .pte_update_defer = paravirt_nop, + .pgd_alloc = __paravirt_pgd_alloc, + .pgd_free = paravirt_nop, + .alloc_pte = xen_alloc_pte_init, .release_pte = xen_release_pte_init, .alloc_pmd = xen_alloc_pte_init, diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 41f5447efd88..5467e2cff4bc 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -219,7 +219,14 @@ struct pv_mmu_ops { void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, unsigned long va); - /* Hooks for allocating/releasing pagetable pages */ + /* Hooks for allocating and freeing a pagetable top-level */ + int (*pgd_alloc)(struct mm_struct *mm); + void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd); + + /* + * Hooks for allocating/releasing pagetable pages when they're + * attached to a pagetable + */ void (*alloc_pte)(struct mm_struct *mm, u32 pfn); void (*alloc_pmd)(struct mm_struct *mm, u32 pfn); void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); @@ -925,6 +932,16 @@ static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); } +static inline int paravirt_pgd_alloc(struct mm_struct *mm) +{ + return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm); +} + +static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd); +} + static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn) { PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn); diff --git a/include/asm-x86/pgalloc.h b/include/asm-x86/pgalloc.h index 91e4641f3f31..d63ea431cb3b 100644 --- a/include/asm-x86/pgalloc.h +++ b/include/asm-x86/pgalloc.h @@ -5,9 +5,13 @@ #include <linux/mm.h> /* for struct page */ #include <linux/pagemap.h> +static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; } + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else +#define paravirt_pgd_alloc(mm) __paravirt_pgd_alloc(mm) +static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) {} static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {} static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {} static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn, -- cgit v1.2.3 From d8d5900ef8afc562088f8470feeaf17c4747790f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge <jeremy@goop.org> Date: Wed, 25 Jun 2008 00:19:13 -0400 Subject: x86: preallocate and prepopulate separately Jan Beulich points out that vmalloc_sync_all() assumes that the kernel's pmd is always expected to be present in the pgd. The current pgd construction code will add the pgd to the pgd_list before its pmds have been pre-populated, thereby making it visible to vmalloc_sync_all(). However, because pgd_prepopulate_pmd also does the allocation, it may block and cannot be done under spinlock. The solution is to preallocate the pmds out of the spinlock, then populate them while holding the pgd_list lock. This patch also pulls the pmd preallocation and mop-up functions out to be common, assuming that the compiler will generate no code for them when PREALLOCTED_PMDS is 0. Also, there's no need for pgd_ctor to clear the pgd again, since it's allocated as a zeroed page. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: xen-devel <xen-devel@lists.xensource.com> Cc: Stephen Tweedie <sct@redhat.com> Cc: Eduardo Habkost <ehabkost@redhat.com> Cc: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Cc: Jan Beulich <jbeulich@novell.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/mm/pgtable.c | 169 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 101 insertions(+), 68 deletions(-) (limited to 'arch/x86/mm/pgtable.c') diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 418c4432fb39..557b2abceef8 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -66,12 +66,6 @@ static inline void pgd_list_del(pgd_t *pgd) static void pgd_ctor(void *p) { pgd_t *pgd = p; - unsigned long flags; - - /* Clear usermode parts of PGD */ - memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t)); - - spin_lock_irqsave(&pgd_lock, flags); /* If the pgd points to a shared pagetable level (either the ptes in non-PAE, or shared PMD in PAE), then just copy the @@ -91,8 +85,6 @@ static void pgd_ctor(void *p) /* list required to sync kernel mapping updates */ if (!SHARED_KERNEL_PMD) pgd_list_add(pgd); - - spin_unlock_irqrestore(&pgd_lock, flags); } static void pgd_dtor(void *pgd) @@ -119,6 +111,72 @@ static void pgd_dtor(void *pgd) */ #ifdef CONFIG_X86_PAE +/* + * In PAE mode, we need to do a cr3 reload (=tlb flush) when + * updating the top-level pagetable entries to guarantee the + * processor notices the update. Since this is expensive, and + * all 4 top-level entries are used almost immediately in a + * new process's life, we just pre-populate them here. + * + * Also, if we're in a paravirt environment where the kernel pmd is + * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate + * and initialize the kernel pmds here. + */ +#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD + +void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) +{ + paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); + + /* Note: almost everything apart from _PAGE_PRESENT is + reserved at the pmd (PDPT) level. */ + set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); + + /* + * According to Intel App note "TLBs, Paging-Structure Caches, + * and Their Invalidation", April 2007, document 317080-001, + * section 8.1: in PAE mode we explicitly have to flush the + * TLB via cr3 if the top-level pgd is changed... + */ + if (mm == current->active_mm) + write_cr3(read_cr3()); +} +#else /* !CONFIG_X86_PAE */ + +/* No need to prepopulate any pagetable entries in non-PAE modes. */ +#define PREALLOCATED_PMDS 0 + +#endif /* CONFIG_X86_PAE */ + +static void free_pmds(pmd_t *pmds[]) +{ + int i; + + for(i = 0; i < PREALLOCATED_PMDS; i++) + if (pmds[i]) + free_page((unsigned long)pmds[i]); +} + +static int preallocate_pmds(pmd_t *pmds[]) +{ + int i; + bool failed = false; + + for(i = 0; i < PREALLOCATED_PMDS; i++) { + pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + if (pmd == NULL) + failed = true; + pmds[i] = pmd; + } + + if (failed) { + free_pmds(pmds); + return -ENOMEM; + } + + return 0; +} + /* * Mop up any pmd pages which may still be attached to the pgd. * Normally they will be freed by munmap/exit_mmap, but any pmd we @@ -129,7 +187,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) { int i; - for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) { + for(i = 0; i < PREALLOCATED_PMDS; i++) { pgd_t pgd = pgdp[i]; if (pgd_val(pgd) != 0) { @@ -143,32 +201,17 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) } } -/* - * In PAE mode, we need to do a cr3 reload (=tlb flush) when - * updating the top-level pagetable entries to guarantee the - * processor notices the update. Since this is expensive, and - * all 4 top-level entries are used almost immediately in a - * new process's life, we just pre-populate them here. - * - * Also, if we're in a paravirt environment where the kernel pmd is - * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate - * and initialize the kernel pmds here. - */ -static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) +static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) { pud_t *pud; unsigned long addr; int i; pud = pud_offset(pgd, 0); - for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; - i++, pud++, addr += PUD_SIZE) { - pmd_t *pmd = pmd_alloc_one(mm, addr); - if (!pmd) { - pgd_mop_up_pmds(mm, pgd); - return 0; - } + for (addr = i = 0; i < PREALLOCATED_PMDS; + i++, pud++, addr += PUD_SIZE) { + pmd_t *pmd = pmds[i]; if (i >= KERNEL_PGD_BOUNDARY) memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), @@ -176,57 +219,47 @@ static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) pud_populate(mm, pud, pmd); } - - return 1; } -void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) +pgd_t *pgd_alloc(struct mm_struct *mm) { - paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); + pgd_t *pgd; + pmd_t *pmds[PREALLOCATED_PMDS]; + unsigned long flags; - /* Note: almost everything apart from _PAGE_PRESENT is - reserved at the pmd (PDPT) level. */ - set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); + pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + + if (pgd == NULL) + goto out; + + mm->pgd = pgd; + + if (preallocate_pmds(pmds) != 0) + goto out_free_pgd; + + if (paravirt_pgd_alloc(mm) != 0) + goto out_free_pmds; /* - * According to Intel App note "TLBs, Paging-Structure Caches, - * and Their Invalidation", April 2007, document 317080-001, - * section 8.1: in PAE mode we explicitly have to flush the - * TLB via cr3 if the top-level pgd is changed... + * Make sure that pre-populating the pmds is atomic with + * respect to anything walking the pgd_list, so that they + * never see a partially populated pgd. */ - if (mm == current->active_mm) - write_cr3(read_cr3()); -} -#else /* !CONFIG_X86_PAE */ -/* No need to prepopulate any pagetable entries in non-PAE modes. */ -static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) -{ - return 1; -} - -static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd) -{ -} -#endif /* CONFIG_X86_PAE */ + spin_lock_irqsave(&pgd_lock, flags); -pgd_t *pgd_alloc(struct mm_struct *mm) -{ - pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + pgd_ctor(pgd); + pgd_prepopulate_pmd(mm, pgd, pmds); - /* so that alloc_pmd can use it */ - mm->pgd = pgd; - if (pgd) { - pgd_ctor(pgd); - - if (paravirt_pgd_alloc(mm) != 0 || - !pgd_prepopulate_pmd(mm, pgd)) { - pgd_dtor(pgd); - free_page((unsigned long)pgd); - pgd = NULL; - } - } + spin_unlock_irqrestore(&pgd_lock, flags); return pgd; + +out_free_pmds: + free_pmds(pmds); +out_free_pgd: + free_page((unsigned long)pgd); +out: + return NULL; } void pgd_free(struct mm_struct *mm, pgd_t *pgd) -- cgit v1.2.3