summaryrefslogtreecommitdiff
path: root/arch/x86/xen/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r--arch/x86/xen/mmu.c458
1 files changed, 83 insertions, 375 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 55c965b38c27..dc708dcc62f1 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -75,67 +75,12 @@
#include "mmu.h"
#include "debugfs.h"
-#define MMU_UPDATE_HISTO 30
-
/*
* Protects atomic reservation decrease/increase against concurrent increases.
* Also protects non-atomic updates of current_pages and balloon lists.
*/
DEFINE_SPINLOCK(xen_reservation_lock);
-#ifdef CONFIG_XEN_DEBUG_FS
-
-static struct {
- u32 pgd_update;
- u32 pgd_update_pinned;
- u32 pgd_update_batched;
-
- u32 pud_update;
- u32 pud_update_pinned;
- u32 pud_update_batched;
-
- u32 pmd_update;
- u32 pmd_update_pinned;
- u32 pmd_update_batched;
-
- u32 pte_update;
- u32 pte_update_pinned;
- u32 pte_update_batched;
-
- u32 mmu_update;
- u32 mmu_update_extended;
- u32 mmu_update_histo[MMU_UPDATE_HISTO];
-
- u32 prot_commit;
- u32 prot_commit_batched;
-
- u32 set_pte_at;
- u32 set_pte_at_batched;
- u32 set_pte_at_pinned;
- u32 set_pte_at_current;
- u32 set_pte_at_kernel;
-} mmu_stats;
-
-static u8 zero_stats;
-
-static inline void check_zero(void)
-{
- if (unlikely(zero_stats)) {
- memset(&mmu_stats, 0, sizeof(mmu_stats));
- zero_stats = 0;
- }
-}
-
-#define ADD_STATS(elem, val) \
- do { check_zero(); mmu_stats.elem += (val); } while(0)
-
-#else /* !CONFIG_XEN_DEBUG_FS */
-
-#define ADD_STATS(elem, val) do { (void)(val); } while(0)
-
-#endif /* CONFIG_XEN_DEBUG_FS */
-
-
/*
* Identity map, in addition to plain kernel map. This needs to be
* large enough to allocate page table pages to allocate the rest.
@@ -243,11 +188,6 @@ static bool xen_page_pinned(void *ptr)
return PagePinned(page);
}
-static bool xen_iomap_pte(pte_t pte)
-{
- return pte_flags(pte) & _PAGE_IOMAP;
-}
-
void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
{
struct multicall_space mcs;
@@ -257,7 +197,7 @@ void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
u = mcs.args;
/* ptep might be kmapped when using 32-bit HIGHPTE */
- u->ptr = arbitrary_virt_to_machine(ptep).maddr;
+ u->ptr = virt_to_machine(ptep).maddr;
u->val = pte_val_ma(pteval);
MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid);
@@ -266,11 +206,6 @@ void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
}
EXPORT_SYMBOL_GPL(xen_set_domain_pte);
-static void xen_set_iomap_pte(pte_t *ptep, pte_t pteval)
-{
- xen_set_domain_pte(ptep, pteval, DOMID_IO);
-}
-
static void xen_extend_mmu_update(const struct mmu_update *update)
{
struct multicall_space mcs;
@@ -279,27 +214,17 @@ static void xen_extend_mmu_update(const struct mmu_update *update)
mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
if (mcs.mc != NULL) {
- ADD_STATS(mmu_update_extended, 1);
- ADD_STATS(mmu_update_histo[mcs.mc->args[1]], -1);
-
mcs.mc->args[1]++;
-
- if (mcs.mc->args[1] < MMU_UPDATE_HISTO)
- ADD_STATS(mmu_update_histo[mcs.mc->args[1]], 1);
- else
- ADD_STATS(mmu_update_histo[0], 1);
} else {
- ADD_STATS(mmu_update, 1);
mcs = __xen_mc_entry(sizeof(*u));
MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
- ADD_STATS(mmu_update_histo[1], 1);
}
u = mcs.args;
*u = *update;
}
-void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
+static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
{
struct mmu_update u;
@@ -312,17 +237,13 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
u.val = pmd_val_ma(val);
xen_extend_mmu_update(&u);
- ADD_STATS(pmd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
-
xen_mc_issue(PARAVIRT_LAZY_MMU);
preempt_enable();
}
-void xen_set_pmd(pmd_t *ptr, pmd_t val)
+static void xen_set_pmd(pmd_t *ptr, pmd_t val)
{
- ADD_STATS(pmd_update, 1);
-
/* If page is not pinned, we can just update the entry
directly */
if (!xen_page_pinned(ptr)) {
@@ -330,8 +251,6 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val)
return;
}
- ADD_STATS(pmd_update_pinned, 1);
-
xen_set_pmd_hyper(ptr, val);
}
@@ -344,35 +263,34 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
}
-void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pteval)
+static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
{
- if (xen_iomap_pte(pteval)) {
- xen_set_iomap_pte(ptep, pteval);
- goto out;
- }
+ struct mmu_update u;
- ADD_STATS(set_pte_at, 1);
-// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
- ADD_STATS(set_pte_at_current, mm == current->mm);
- ADD_STATS(set_pte_at_kernel, mm == &init_mm);
+ if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU)
+ return false;
- if (mm == current->mm || mm == &init_mm) {
- if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
- struct multicall_space mcs;
- mcs = xen_mc_entry(0);
+ xen_mc_batch();
- MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
- ADD_STATS(set_pte_at_batched, 1);
- xen_mc_issue(PARAVIRT_LAZY_MMU);
- goto out;
- } else
- if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
- goto out;
- }
- xen_set_pte(ptep, pteval);
+ u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
+ u.val = pte_val_ma(pteval);
+ xen_extend_mmu_update(&u);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+ return true;
+}
+
+static void xen_set_pte(pte_t *ptep, pte_t pteval)
+{
+ if (!xen_batched_set_pte(ptep, pteval))
+ native_set_pte(ptep, pteval);
+}
-out: return;
+static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pteval)
+{
+ xen_set_pte(ptep, pteval);
}
pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
@@ -389,13 +307,10 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
xen_mc_batch();
- u.ptr = arbitrary_virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
+ u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
u.val = pte_val_ma(pte);
xen_extend_mmu_update(&u);
- ADD_STATS(prot_commit, 1);
- ADD_STATS(prot_commit_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
-
xen_mc_issue(PARAVIRT_LAZY_MMU);
}
@@ -463,7 +378,7 @@ static pteval_t iomap_pte(pteval_t val)
return val;
}
-pteval_t xen_pte_val(pte_t pte)
+static pteval_t xen_pte_val(pte_t pte)
{
pteval_t pteval = pte.pte;
@@ -480,7 +395,7 @@ pteval_t xen_pte_val(pte_t pte)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
-pgdval_t xen_pgd_val(pgd_t pgd)
+static pgdval_t xen_pgd_val(pgd_t pgd)
{
return pte_mfn_to_pfn(pgd.pgd);
}
@@ -511,7 +426,7 @@ void xen_set_pat(u64 pat)
WARN_ON(pat != 0x0007010600070106ull);
}
-pte_t xen_make_pte(pteval_t pte)
+static pte_t xen_make_pte(pteval_t pte)
{
phys_addr_t addr = (pte & PTE_PFN_MASK);
@@ -581,20 +496,20 @@ pte_t xen_make_pte_debug(pteval_t pte)
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
#endif
-pgd_t xen_make_pgd(pgdval_t pgd)
+static pgd_t xen_make_pgd(pgdval_t pgd)
{
pgd = pte_pfn_to_mfn(pgd);
return native_make_pgd(pgd);
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
-pmdval_t xen_pmd_val(pmd_t pmd)
+static pmdval_t xen_pmd_val(pmd_t pmd)
{
return pte_mfn_to_pfn(pmd.pmd);
}
PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val);
-void xen_set_pud_hyper(pud_t *ptr, pud_t val)
+static void xen_set_pud_hyper(pud_t *ptr, pud_t val)
{
struct mmu_update u;
@@ -607,17 +522,13 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val)
u.val = pud_val_ma(val);
xen_extend_mmu_update(&u);
- ADD_STATS(pud_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
-
xen_mc_issue(PARAVIRT_LAZY_MMU);
preempt_enable();
}
-void xen_set_pud(pud_t *ptr, pud_t val)
+static void xen_set_pud(pud_t *ptr, pud_t val)
{
- ADD_STATS(pud_update, 1);
-
/* If page is not pinned, we can just update the entry
directly */
if (!xen_page_pinned(ptr)) {
@@ -625,56 +536,28 @@ void xen_set_pud(pud_t *ptr, pud_t val)
return;
}
- ADD_STATS(pud_update_pinned, 1);
-
xen_set_pud_hyper(ptr, val);
}
-void xen_set_pte(pte_t *ptep, pte_t pte)
-{
- if (xen_iomap_pte(pte)) {
- xen_set_iomap_pte(ptep, pte);
- return;
- }
-
- ADD_STATS(pte_update, 1);
-// ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
- ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
-
-#ifdef CONFIG_X86_PAE
- ptep->pte_high = pte.pte_high;
- smp_wmb();
- ptep->pte_low = pte.pte_low;
-#else
- *ptep = pte;
-#endif
-}
-
#ifdef CONFIG_X86_PAE
-void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
+static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
{
- if (xen_iomap_pte(pte)) {
- xen_set_iomap_pte(ptep, pte);
- return;
- }
-
set_64bit((u64 *)ptep, native_pte_val(pte));
}
-void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
- ptep->pte_low = 0;
- smp_wmb(); /* make sure low gets written first */
- ptep->pte_high = 0;
+ if (!xen_batched_set_pte(ptep, native_make_pte(0)))
+ native_pte_clear(mm, addr, ptep);
}
-void xen_pmd_clear(pmd_t *pmdp)
+static void xen_pmd_clear(pmd_t *pmdp)
{
set_pmd(pmdp, __pmd(0));
}
#endif /* CONFIG_X86_PAE */
-pmd_t xen_make_pmd(pmdval_t pmd)
+static pmd_t xen_make_pmd(pmdval_t pmd)
{
pmd = pte_pfn_to_mfn(pmd);
return native_make_pmd(pmd);
@@ -682,13 +565,13 @@ pmd_t xen_make_pmd(pmdval_t pmd)
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
#if PAGETABLE_LEVELS == 4
-pudval_t xen_pud_val(pud_t pud)
+static pudval_t xen_pud_val(pud_t pud)
{
return pte_mfn_to_pfn(pud.pud);
}
PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
-pud_t xen_make_pud(pudval_t pud)
+static pud_t xen_make_pud(pudval_t pud)
{
pud = pte_pfn_to_mfn(pud);
@@ -696,7 +579,7 @@ pud_t xen_make_pud(pudval_t pud)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud);
-pgd_t *xen_get_user_pgd(pgd_t *pgd)
+static pgd_t *xen_get_user_pgd(pgd_t *pgd)
{
pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK);
unsigned offset = pgd - pgd_page;
@@ -728,7 +611,7 @@ static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
* 2. It is always pinned
* 3. It has no user pagetable attached to it
*/
-void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
+static void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
{
preempt_disable();
@@ -741,12 +624,10 @@ void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
preempt_enable();
}
-void xen_set_pgd(pgd_t *ptr, pgd_t val)
+static void xen_set_pgd(pgd_t *ptr, pgd_t val)
{
pgd_t *user_ptr = xen_get_user_pgd(ptr);
- ADD_STATS(pgd_update, 1);
-
/* If page is not pinned, we can just update the entry
directly */
if (!xen_page_pinned(ptr)) {
@@ -758,9 +639,6 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
return;
}
- ADD_STATS(pgd_update_pinned, 1);
- ADD_STATS(pgd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
-
/* If it's pinned, then we can at least batch the kernel and
user updates together. */
xen_mc_batch();
@@ -1054,7 +932,7 @@ void xen_mm_pin_all(void)
* that's before we have page structures to store the bits. So do all
* the book-keeping now.
*/
-static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
+static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
enum pt_level level)
{
SetPagePinned(page);
@@ -1162,14 +1040,14 @@ void xen_mm_unpin_all(void)
spin_unlock(&pgd_lock);
}
-void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
+static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
{
spin_lock(&next->page_table_lock);
xen_pgd_pin(next);
spin_unlock(&next->page_table_lock);
}
-void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{
spin_lock(&mm->page_table_lock);
xen_pgd_pin(mm);
@@ -1187,7 +1065,7 @@ static void drop_other_mm_ref(void *info)
active_mm = percpu_read(cpu_tlbstate.active_mm);
- if (active_mm == mm)
+ if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
leave_mm(smp_processor_id());
/* If this cpu still has a stale cr3 reference, then make sure
@@ -1256,7 +1134,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
* pagetable because of lazy tlb flushing. This means we need need to
* switch all CPUs off this pagetable before we can unpin it.
*/
-void xen_exit_mmap(struct mm_struct *mm)
+static void xen_exit_mmap(struct mm_struct *mm)
{
get_cpu(); /* make sure we don't move around */
xen_drop_mm_ref(mm);
@@ -1271,13 +1149,27 @@ void xen_exit_mmap(struct mm_struct *mm)
spin_unlock(&mm->page_table_lock);
}
-static __init void xen_pagetable_setup_start(pgd_t *base)
+static void __init xen_pagetable_setup_start(pgd_t *base)
{
}
+static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
+{
+ /* reserve the range used */
+ native_pagetable_reserve(start, end);
+
+ /* set as RW the rest */
+ printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
+ PFN_PHYS(pgt_buf_top));
+ while (end < PFN_PHYS(pgt_buf_top)) {
+ make_lowmem_page_readwrite(__va(end));
+ end += PAGE_SIZE;
+ }
+}
+
static void xen_post_allocator_init(void);
-static __init void xen_pagetable_setup_done(pgd_t *base)
+static void __init xen_pagetable_setup_done(pgd_t *base)
{
xen_setup_shared_info();
xen_post_allocator_init();
@@ -1463,119 +1355,6 @@ static int xen_pgd_alloc(struct mm_struct *mm)
return ret;
}
-#ifdef CONFIG_X86_64
-static __initdata u64 __last_pgt_set_rw = 0;
-static __initdata u64 __pgt_buf_start = 0;
-static __initdata u64 __pgt_buf_end = 0;
-static __initdata u64 __pgt_buf_top = 0;
-/*
- * As a consequence of the commit:
- *
- * commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e
- * Author: Yinghai Lu <yinghai@kernel.org>
- * Date: Fri Dec 17 16:58:28 2010 -0800
- *
- * x86-64, mm: Put early page table high
- *
- * at some point init_memory_mapping is going to reach the pagetable pages
- * area and map those pages too (mapping them as normal memory that falls
- * in the range of addresses passed to init_memory_mapping as argument).
- * Some of those pages are already pagetable pages (they are in the range
- * pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and
- * everything is fine.
- * Some of these pages are not pagetable pages yet (they fall in the range
- * pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they
- * are going to be mapped RW. When these pages become pagetable pages and
- * are hooked into the pagetable, xen will find that the guest has already
- * a RW mapping of them somewhere and fail the operation.
- * The reason Xen requires pagetables to be RO is that the hypervisor needs
- * to verify that the pagetables are valid before using them. The validation
- * operations are called "pinning".
- *
- * In order to fix the issue we mark all the pages in the entire range
- * pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation
- * is completed only the range pgt_buf_start-pgt_buf_end is reserved by
- * init_memory_mapping. Hence the kernel is going to crash as soon as one
- * of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those
- * ranges are RO).
- *
- * For this reason, 'mark_rw_past_pgt' is introduced which is called _after_
- * the init_memory_mapping has completed (in a perfect world we would
- * call this function from init_memory_mapping, but lets ignore that).
- *
- * Because we are called _after_ init_memory_mapping the pgt_buf_[start,
- * end,top] have all changed to new values (b/c init_memory_mapping
- * is called and setting up another new page-table). Hence, the first time
- * we enter this function, we save away the pgt_buf_start value and update
- * the pgt_buf_[end,top].
- *
- * When we detect that the "old" pgt_buf_start through pgt_buf_end
- * PFNs have been reserved (so memblock_x86_reserve_range has been called),
- * we immediately set out to RW the "old" pgt_buf_end through pgt_buf_top.
- *
- * And then we update those "old" pgt_buf_[end|top] with the new ones
- * so that we can redo this on the next pagetable.
- */
-static __init void mark_rw_past_pgt(void) {
-
- if (pgt_buf_end > pgt_buf_start) {
- u64 addr, size;
-
- /* Save it away. */
- if (!__pgt_buf_start) {
- __pgt_buf_start = pgt_buf_start;
- __pgt_buf_end = pgt_buf_end;
- __pgt_buf_top = pgt_buf_top;
- return;
- }
- /* If we get the range that starts at __pgt_buf_end that means
- * the range is reserved, and that in 'init_memory_mapping'
- * the 'memblock_x86_reserve_range' has been called with the
- * outdated __pgt_buf_start, __pgt_buf_end (the "new"
- * pgt_buf_[start|end|top] refer now to a new pagetable.
- * Note: we are called _after_ the pgt_buf_[..] have been
- * updated.*/
-
- addr = memblock_x86_find_in_range_size(PFN_PHYS(__pgt_buf_start),
- &size, PAGE_SIZE);
-
- /* Still not reserved, meaning 'memblock_x86_reserve_range'
- * hasn't been called yet. Update the _end and _top.*/
- if (addr == PFN_PHYS(__pgt_buf_start)) {
- __pgt_buf_end = pgt_buf_end;
- __pgt_buf_top = pgt_buf_top;
- return;
- }
-
- /* OK, the area is reserved, meaning it is time for us to
- * set RW for the old end->top PFNs. */
-
- /* ..unless we had already done this. */
- if (__pgt_buf_end == __last_pgt_set_rw)
- return;
-
- addr = PFN_PHYS(__pgt_buf_end);
-
- /* set as RW the rest */
- printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n",
- PFN_PHYS(__pgt_buf_end), PFN_PHYS(__pgt_buf_top));
-
- while (addr < PFN_PHYS(__pgt_buf_top)) {
- make_lowmem_page_readwrite(__va(addr));
- addr += PAGE_SIZE;
- }
- /* And update everything so that we are ready for the next
- * pagetable (the one created for regions past 4GB) */
- __last_pgt_set_rw = __pgt_buf_end;
- __pgt_buf_start = pgt_buf_start;
- __pgt_buf_end = pgt_buf_end;
- __pgt_buf_top = pgt_buf_top;
- }
- return;
-}
-#else
-static __init void mark_rw_past_pgt(void) { }
-#endif
static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
#ifdef CONFIG_X86_64
@@ -1587,7 +1366,7 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
}
#ifdef CONFIG_X86_32
-static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
+static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
{
/* If there's an existing pte, then don't allow _PAGE_RW to be set */
if (pte_val_ma(*ptep) & _PAGE_PRESENT)
@@ -1597,19 +1376,11 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
return pte;
}
#else /* CONFIG_X86_64 */
-static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
+static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
{
unsigned long pfn = pte_pfn(pte);
/*
- * A bit of optimization. We do not need to call the workaround
- * when xen_set_pte_init is called with a PTE with 0 as PFN.
- * That is b/c the pagetable at that point are just being populated
- * with empty values and we can save some cycles by not calling
- * the 'memblock' code.*/
- if (pfn)
- mark_rw_past_pgt();
- /*
* If the new pfn is within the range of the newly allocated
* kernel pagetable, and it isn't being mapped into an
* early_ioremap fixmap slot as a freshly allocated page, make sure
@@ -1626,7 +1397,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
/* Init-time set_pte while constructing initial pagetables, which
doesn't allow RO pagetable pages to be remapped RW */
-static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
+static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
{
pte = mask_rw_pte(ptep, pte);
@@ -1644,7 +1415,7 @@ static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
/* Early in boot, while setting up the initial pagetable, assume
everything is pinned. */
-static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
+static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
{
#ifdef CONFIG_FLATMEM
BUG_ON(mem_map); /* should only be used early */
@@ -1654,7 +1425,7 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
}
/* Used for pmd and pud */
-static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
+static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
{
#ifdef CONFIG_FLATMEM
BUG_ON(mem_map); /* should only be used early */
@@ -1664,13 +1435,13 @@ static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
/* Early release_pte assumes that all pts are pinned, since there's
only init_mm and anything attached to that is pinned. */
-static __init void xen_release_pte_init(unsigned long pfn)
+static void __init xen_release_pte_init(unsigned long pfn)
{
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
-static __init void xen_release_pmd_init(unsigned long pfn)
+static void __init xen_release_pmd_init(unsigned long pfn)
{
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
@@ -1796,7 +1567,7 @@ static void set_page_prot(void *addr, pgprot_t prot)
BUG();
}
-static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
{
unsigned pmdidx, pteidx;
unsigned ident_pte;
@@ -1879,7 +1650,7 @@ static void convert_pfn_mfn(void *v)
* of the physical mapping once some sort of allocator has been set
* up.
*/
-__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
unsigned long max_pfn)
{
pud_t *l3;
@@ -1950,7 +1721,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
-static __init void xen_write_cr3_init(unsigned long cr3)
+static void __init xen_write_cr3_init(unsigned long cr3)
{
unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
@@ -1987,7 +1758,7 @@ static __init void xen_write_cr3_init(unsigned long cr3)
pv_mmu_ops.write_cr3 = &xen_write_cr3;
}
-__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
unsigned long max_pfn)
{
pmd_t *kernel_pmd;
@@ -2093,7 +1864,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#endif
}
-__init void xen_ident_map_ISA(void)
+void __init xen_ident_map_ISA(void)
{
unsigned long pa;
@@ -2116,10 +1887,8 @@ __init void xen_ident_map_ISA(void)
xen_flush_tlb();
}
-static __init void xen_post_allocator_init(void)
+static void __init xen_post_allocator_init(void)
{
- mark_rw_past_pgt();
-
#ifdef CONFIG_XEN_DEBUG
pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
#endif
@@ -2155,7 +1924,7 @@ static void xen_leave_lazy_mmu(void)
preempt_enable();
}
-static const struct pv_mmu_ops xen_mmu_ops __initdata = {
+static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.read_cr2 = xen_read_cr2,
.write_cr2 = xen_write_cr2,
@@ -2228,6 +1997,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
void __init xen_init_mmu_ops(void)
{
+ x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
pv_mmu_ops = xen_mmu_ops;
@@ -2479,7 +2249,7 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
struct remap_data *rmd = data;
pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
- rmd->mmu_update->ptr = arbitrary_virt_to_machine(ptep).maddr;
+ rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
rmd->mmu_update->val = pte_val_ma(pte);
rmd->mmu_update++;
@@ -2533,7 +2303,6 @@ out:
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
#ifdef CONFIG_XEN_DEBUG_FS
-
static int p2m_dump_open(struct inode *inode, struct file *filp)
{
return single_open(filp, p2m_dump_show, NULL);
@@ -2545,65 +2314,4 @@ static const struct file_operations p2m_dump_fops = {
.llseek = seq_lseek,
.release = single_release,
};
-
-static struct dentry *d_mmu_debug;
-
-static int __init xen_mmu_debugfs(void)
-{
- struct dentry *d_xen = xen_init_debugfs();
-
- if (d_xen == NULL)
- return -ENOMEM;
-
- d_mmu_debug = debugfs_create_dir("mmu", d_xen);
-
- debugfs_create_u8("zero_stats", 0644, d_mmu_debug, &zero_stats);
-
- debugfs_create_u32("pgd_update", 0444, d_mmu_debug, &mmu_stats.pgd_update);
- debugfs_create_u32("pgd_update_pinned", 0444, d_mmu_debug,
- &mmu_stats.pgd_update_pinned);
- debugfs_create_u32("pgd_update_batched", 0444, d_mmu_debug,
- &mmu_stats.pgd_update_pinned);
-
- debugfs_create_u32("pud_update", 0444, d_mmu_debug, &mmu_stats.pud_update);
- debugfs_create_u32("pud_update_pinned", 0444, d_mmu_debug,
- &mmu_stats.pud_update_pinned);
- debugfs_create_u32("pud_update_batched", 0444, d_mmu_debug,
- &mmu_stats.pud_update_pinned);
-
- debugfs_create_u32("pmd_update", 0444, d_mmu_debug, &mmu_stats.pmd_update);
- debugfs_create_u32("pmd_update_pinned", 0444, d_mmu_debug,
- &mmu_stats.pmd_update_pinned);
- debugfs_create_u32("pmd_update_batched", 0444, d_mmu_debug,
- &mmu_stats.pmd_update_pinned);
-
- debugfs_create_u32("pte_update", 0444, d_mmu_debug, &mmu_stats.pte_update);
-// debugfs_create_u32("pte_update_pinned", 0444, d_mmu_debug,
-// &mmu_stats.pte_update_pinned);
- debugfs_create_u32("pte_update_batched", 0444, d_mmu_debug,
- &mmu_stats.pte_update_pinned);
-
- debugfs_create_u32("mmu_update", 0444, d_mmu_debug, &mmu_stats.mmu_update);
- debugfs_create_u32("mmu_update_extended", 0444, d_mmu_debug,
- &mmu_stats.mmu_update_extended);
- xen_debugfs_create_u32_array("mmu_update_histo", 0444, d_mmu_debug,
- mmu_stats.mmu_update_histo, 20);
-
- debugfs_create_u32("set_pte_at", 0444, d_mmu_debug, &mmu_stats.set_pte_at);
- debugfs_create_u32("set_pte_at_batched", 0444, d_mmu_debug,
- &mmu_stats.set_pte_at_batched);
- debugfs_create_u32("set_pte_at_current", 0444, d_mmu_debug,
- &mmu_stats.set_pte_at_current);
- debugfs_create_u32("set_pte_at_kernel", 0444, d_mmu_debug,
- &mmu_stats.set_pte_at_kernel);
-
- debugfs_create_u32("prot_commit", 0444, d_mmu_debug, &mmu_stats.prot_commit);
- debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
- &mmu_stats.prot_commit_batched);
-
- debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
- return 0;
-}
-fs_initcall(xen_mmu_debugfs);
-
-#endif /* CONFIG_XEN_DEBUG_FS */
+#endif /* CONFIG_XEN_DEBUG_FS */