diff options
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r-- | arch/x86/xen/mmu.c | 168 |
1 files changed, 127 insertions, 41 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 4fd7387222bf..02d752460371 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -46,6 +46,7 @@ #include <linux/module.h> #include <linux/gfp.h> #include <linux/memblock.h> +#include <linux/seq_file.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> @@ -78,8 +79,7 @@ /* * Protects atomic reservation decrease/increase against concurrent increases. - * Also protects non-atomic updates of current_pages and driver_pages, and - * balloon lists. + * Also protects non-atomic updates of current_pages and balloon lists. */ DEFINE_SPINLOCK(xen_reservation_lock); @@ -416,8 +416,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) if (val & _PAGE_PRESENT) { unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; pteval_t flags = val & PTE_FLAGS_MASK; - unsigned long mfn = pfn_to_mfn(pfn); + unsigned long mfn; + if (!xen_feature(XENFEAT_auto_translated_physmap)) + mfn = get_phys_to_machine(pfn); + else + mfn = pfn; /* * If there's no mfn for the pfn, then just create an * empty non-present pte. Unfortunately this loses @@ -427,8 +431,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) if (unlikely(mfn == INVALID_P2M_ENTRY)) { mfn = 0; flags = 0; + } else { + /* + * Paramount to do this test _after_ the + * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY & + * IDENTITY_FRAME_BIT resolves to true. + */ + mfn &= ~FOREIGN_FRAME_BIT; + if (mfn & IDENTITY_FRAME_BIT) { + mfn &= ~IDENTITY_FRAME_BIT; + flags |= _PAGE_IOMAP; + } } - val = ((pteval_t)mfn << PAGE_SHIFT) | flags; } @@ -532,6 +546,41 @@ pte_t xen_make_pte(pteval_t pte) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); +#ifdef CONFIG_XEN_DEBUG +pte_t xen_make_pte_debug(pteval_t pte) +{ + phys_addr_t addr = (pte & PTE_PFN_MASK); + phys_addr_t other_addr; + bool io_page = false; + pte_t _pte; + + if (pte & _PAGE_IOMAP) + io_page = true; + + _pte = xen_make_pte(pte); + + if (!addr) + return _pte; + + if (io_page && + (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { + other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT; + WARN_ONCE(addr != other_addr, + "0x%lx is using VM_IO, but it is 0x%lx!\n", + (unsigned long)addr, (unsigned long)other_addr); + } else { + pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP; + other_addr = (_pte.pte & PTE_PFN_MASK); + WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set), + "0x%lx is missing VM_IO (and wasn't fixed)!\n", + (unsigned long)addr); + } + + return _pte; +} +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug); +#endif + pgd_t xen_make_pgd(pgdval_t pgd) { pgd = pte_pfn_to_mfn(pgd); @@ -986,10 +1035,9 @@ static void xen_pgd_pin(struct mm_struct *mm) */ void xen_mm_pin_all(void) { - unsigned long flags; struct page *page; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { if (!PagePinned(page)) { @@ -998,7 +1046,7 @@ void xen_mm_pin_all(void) } } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } /* @@ -1006,7 +1054,7 @@ void xen_mm_pin_all(void) * that's before we have page structures to store the bits. So do all * the book-keeping now. */ -static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page, +static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, enum pt_level level) { SetPagePinned(page); @@ -1099,10 +1147,9 @@ static void xen_pgd_unpin(struct mm_struct *mm) */ void xen_mm_unpin_all(void) { - unsigned long flags; struct page *page; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { if (PageSavePinned(page)) { @@ -1112,7 +1159,7 @@ void xen_mm_unpin_all(void) } } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) @@ -1224,13 +1271,27 @@ void xen_exit_mmap(struct mm_struct *mm) spin_unlock(&mm->page_table_lock); } -static __init void xen_pagetable_setup_start(pgd_t *base) +static void __init xen_pagetable_setup_start(pgd_t *base) { } +static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) +{ + /* reserve the range used */ + native_pagetable_reserve(start, end); + + /* set as RW the rest */ + printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end, + PFN_PHYS(pgt_buf_top)); + while (end < PFN_PHYS(pgt_buf_top)) { + make_lowmem_page_readwrite(__va(end)); + end += PAGE_SIZE; + } +} + static void xen_post_allocator_init(void); -static __init void xen_pagetable_setup_done(pgd_t *base) +static void __init xen_pagetable_setup_done(pgd_t *base) { xen_setup_shared_info(); xen_post_allocator_init(); @@ -1426,32 +1487,39 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) #endif } -static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) -{ - unsigned long pfn = pte_pfn(pte); - #ifdef CONFIG_X86_32 +static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) +{ /* If there's an existing pte, then don't allow _PAGE_RW to be set */ if (pte_val_ma(*ptep) & _PAGE_PRESENT) pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & pte_val_ma(pte)); -#endif + + return pte; +} +#else /* CONFIG_X86_64 */ +static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) +{ + unsigned long pfn = pte_pfn(pte); /* * If the new pfn is within the range of the newly allocated * kernel pagetable, and it isn't being mapped into an - * early_ioremap fixmap slot, make sure it is RO. + * early_ioremap fixmap slot as a freshly allocated page, make sure + * it is RO. */ - if (!is_early_ioremap_ptep(ptep) && - pfn >= e820_table_start && pfn < e820_table_end) + if (((!is_early_ioremap_ptep(ptep) && + pfn >= pgt_buf_start && pfn < pgt_buf_top)) || + (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) pte = pte_wrprotect(pte); return pte; } +#endif /* CONFIG_X86_64 */ /* Init-time set_pte while constructing initial pagetables, which doesn't allow RO pagetable pages to be remapped RW */ -static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) +static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) { pte = mask_rw_pte(ptep, pte); @@ -1469,7 +1537,7 @@ static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) /* Early in boot, while setting up the initial pagetable, assume everything is pinned. */ -static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) +static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) { #ifdef CONFIG_FLATMEM BUG_ON(mem_map); /* should only be used early */ @@ -1479,7 +1547,7 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) } /* Used for pmd and pud */ -static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) +static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) { #ifdef CONFIG_FLATMEM BUG_ON(mem_map); /* should only be used early */ @@ -1489,13 +1557,13 @@ static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) /* Early release_pte assumes that all pts are pinned, since there's only init_mm and anything attached to that is pinned. */ -static __init void xen_release_pte_init(unsigned long pfn) +static void __init xen_release_pte_init(unsigned long pfn) { pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } -static __init void xen_release_pmd_init(unsigned long pfn) +static void __init xen_release_pmd_init(unsigned long pfn) { make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } @@ -1621,7 +1689,7 @@ static void set_page_prot(void *addr, pgprot_t prot) BUG(); } -static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) +static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) { unsigned pmdidx, pteidx; unsigned ident_pte; @@ -1653,9 +1721,6 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { pte_t pte; - if (pfn > max_pfn_mapped) - max_pfn_mapped = pfn; - if (!pte_none(pte_page[pteidx])) continue; @@ -1697,7 +1762,7 @@ static void convert_pfn_mfn(void *v) } /* - * Set up the inital kernel pagetable. + * Set up the initial kernel pagetable. * * We can construct this by grafting the Xen provided pagetable into * head_64.S's preconstructed pagetables. We copy the Xen L2's into @@ -1707,12 +1772,18 @@ static void convert_pfn_mfn(void *v) * of the physical mapping once some sort of allocator has been set * up. */ -__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, +pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pud_t *l3; pmd_t *l2; + /* max_pfn_mapped is the last pfn mapped in the initial memory + * mappings. Considering that on Xen after the kernel mappings we + * have the mappings of some pages that don't exist in pfn space, we + * set max_pfn_mapped to the last real pfn mapped. */ + max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); + /* Zap identity mapping */ init_level4_pgt[0] = __pgd(0); @@ -1772,7 +1843,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); -static __init void xen_write_cr3_init(unsigned long cr3) +static void __init xen_write_cr3_init(unsigned long cr3) { unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); @@ -1809,7 +1880,7 @@ static __init void xen_write_cr3_init(unsigned long cr3) pv_mmu_ops.write_cr3 = &xen_write_cr3; } -__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, +pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pmd_t *kernel_pmd; @@ -1817,9 +1888,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, initial_kernel_pmd = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + - xen_start_info->nr_pt_frames * PAGE_SIZE + - 512*1024); + max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); @@ -1917,7 +1986,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #endif } -__init void xen_ident_map_ISA(void) +void __init xen_ident_map_ISA(void) { unsigned long pa; @@ -1940,8 +2009,11 @@ __init void xen_ident_map_ISA(void) xen_flush_tlb(); } -static __init void xen_post_allocator_init(void) +static void __init xen_post_allocator_init(void) { +#ifdef CONFIG_XEN_DEBUG + pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); +#endif pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pmd = xen_set_pmd; pv_mmu_ops.set_pud = xen_set_pud; @@ -1974,7 +2046,7 @@ static void xen_leave_lazy_mmu(void) preempt_enable(); } -static const struct pv_mmu_ops xen_mmu_ops __initdata = { +static const struct pv_mmu_ops xen_mmu_ops __initconst = { .read_cr2 = xen_read_cr2, .write_cr2 = xen_write_cr2, @@ -2047,6 +2119,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { void __init xen_init_mmu_ops(void) { + x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; pv_mmu_ops = xen_mmu_ops; @@ -2074,7 +2147,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order, in_frames[i] = virt_to_mfn(vaddr); MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); - set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); + __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); if (out_frames) out_frames[i] = virt_to_pfn(vaddr); @@ -2353,6 +2426,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); #ifdef CONFIG_XEN_DEBUG_FS +static int p2m_dump_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, p2m_dump_show, NULL); +} + +static const struct file_operations p2m_dump_fops = { + .open = p2m_dump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static struct dentry *d_mmu_debug; static int __init xen_mmu_debugfs(void) @@ -2408,6 +2493,7 @@ static int __init xen_mmu_debugfs(void) debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug, &mmu_stats.prot_commit_batched); + debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops); return 0; } fs_initcall(xen_mmu_debugfs); |