summaryrefslogtreecommitdiff
path: root/arch/x86/xen/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r--arch/x86/xen/mmu.c168
1 files changed, 127 insertions, 41 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 4fd7387222bf..02d752460371 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -46,6 +46,7 @@
#include <linux/module.h>
#include <linux/gfp.h>
#include <linux/memblock.h>
+#include <linux/seq_file.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -78,8 +79,7 @@
/*
* Protects atomic reservation decrease/increase against concurrent increases.
- * Also protects non-atomic updates of current_pages and driver_pages, and
- * balloon lists.
+ * Also protects non-atomic updates of current_pages and balloon lists.
*/
DEFINE_SPINLOCK(xen_reservation_lock);
@@ -416,8 +416,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
if (val & _PAGE_PRESENT) {
unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
pteval_t flags = val & PTE_FLAGS_MASK;
- unsigned long mfn = pfn_to_mfn(pfn);
+ unsigned long mfn;
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
+ mfn = get_phys_to_machine(pfn);
+ else
+ mfn = pfn;
/*
* If there's no mfn for the pfn, then just create an
* empty non-present pte. Unfortunately this loses
@@ -427,8 +431,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
if (unlikely(mfn == INVALID_P2M_ENTRY)) {
mfn = 0;
flags = 0;
+ } else {
+ /*
+ * Paramount to do this test _after_ the
+ * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
+ * IDENTITY_FRAME_BIT resolves to true.
+ */
+ mfn &= ~FOREIGN_FRAME_BIT;
+ if (mfn & IDENTITY_FRAME_BIT) {
+ mfn &= ~IDENTITY_FRAME_BIT;
+ flags |= _PAGE_IOMAP;
+ }
}
-
val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
}
@@ -532,6 +546,41 @@ pte_t xen_make_pte(pteval_t pte)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
+#ifdef CONFIG_XEN_DEBUG
+pte_t xen_make_pte_debug(pteval_t pte)
+{
+ phys_addr_t addr = (pte & PTE_PFN_MASK);
+ phys_addr_t other_addr;
+ bool io_page = false;
+ pte_t _pte;
+
+ if (pte & _PAGE_IOMAP)
+ io_page = true;
+
+ _pte = xen_make_pte(pte);
+
+ if (!addr)
+ return _pte;
+
+ if (io_page &&
+ (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
+ other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
+ WARN_ONCE(addr != other_addr,
+ "0x%lx is using VM_IO, but it is 0x%lx!\n",
+ (unsigned long)addr, (unsigned long)other_addr);
+ } else {
+ pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
+ other_addr = (_pte.pte & PTE_PFN_MASK);
+ WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set),
+ "0x%lx is missing VM_IO (and wasn't fixed)!\n",
+ (unsigned long)addr);
+ }
+
+ return _pte;
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
+#endif
+
pgd_t xen_make_pgd(pgdval_t pgd)
{
pgd = pte_pfn_to_mfn(pgd);
@@ -986,10 +1035,9 @@ static void xen_pgd_pin(struct mm_struct *mm)
*/
void xen_mm_pin_all(void)
{
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
if (!PagePinned(page)) {
@@ -998,7 +1046,7 @@ void xen_mm_pin_all(void)
}
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
/*
@@ -1006,7 +1054,7 @@ void xen_mm_pin_all(void)
* that's before we have page structures to store the bits. So do all
* the book-keeping now.
*/
-static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
+static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
enum pt_level level)
{
SetPagePinned(page);
@@ -1099,10 +1147,9 @@ static void xen_pgd_unpin(struct mm_struct *mm)
*/
void xen_mm_unpin_all(void)
{
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
if (PageSavePinned(page)) {
@@ -1112,7 +1159,7 @@ void xen_mm_unpin_all(void)
}
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
@@ -1224,13 +1271,27 @@ void xen_exit_mmap(struct mm_struct *mm)
spin_unlock(&mm->page_table_lock);
}
-static __init void xen_pagetable_setup_start(pgd_t *base)
+static void __init xen_pagetable_setup_start(pgd_t *base)
{
}
+static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
+{
+ /* reserve the range used */
+ native_pagetable_reserve(start, end);
+
+ /* set as RW the rest */
+ printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
+ PFN_PHYS(pgt_buf_top));
+ while (end < PFN_PHYS(pgt_buf_top)) {
+ make_lowmem_page_readwrite(__va(end));
+ end += PAGE_SIZE;
+ }
+}
+
static void xen_post_allocator_init(void);
-static __init void xen_pagetable_setup_done(pgd_t *base)
+static void __init xen_pagetable_setup_done(pgd_t *base)
{
xen_setup_shared_info();
xen_post_allocator_init();
@@ -1426,32 +1487,39 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
#endif
}
-static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
-{
- unsigned long pfn = pte_pfn(pte);
-
#ifdef CONFIG_X86_32
+static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
+{
/* If there's an existing pte, then don't allow _PAGE_RW to be set */
if (pte_val_ma(*ptep) & _PAGE_PRESENT)
pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
pte_val_ma(pte));
-#endif
+
+ return pte;
+}
+#else /* CONFIG_X86_64 */
+static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
+{
+ unsigned long pfn = pte_pfn(pte);
/*
* If the new pfn is within the range of the newly allocated
* kernel pagetable, and it isn't being mapped into an
- * early_ioremap fixmap slot, make sure it is RO.
+ * early_ioremap fixmap slot as a freshly allocated page, make sure
+ * it is RO.
*/
- if (!is_early_ioremap_ptep(ptep) &&
- pfn >= e820_table_start && pfn < e820_table_end)
+ if (((!is_early_ioremap_ptep(ptep) &&
+ pfn >= pgt_buf_start && pfn < pgt_buf_top)) ||
+ (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
pte = pte_wrprotect(pte);
return pte;
}
+#endif /* CONFIG_X86_64 */
/* Init-time set_pte while constructing initial pagetables, which
doesn't allow RO pagetable pages to be remapped RW */
-static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
+static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
{
pte = mask_rw_pte(ptep, pte);
@@ -1469,7 +1537,7 @@ static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
/* Early in boot, while setting up the initial pagetable, assume
everything is pinned. */
-static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
+static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
{
#ifdef CONFIG_FLATMEM
BUG_ON(mem_map); /* should only be used early */
@@ -1479,7 +1547,7 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
}
/* Used for pmd and pud */
-static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
+static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
{
#ifdef CONFIG_FLATMEM
BUG_ON(mem_map); /* should only be used early */
@@ -1489,13 +1557,13 @@ static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
/* Early release_pte assumes that all pts are pinned, since there's
only init_mm and anything attached to that is pinned. */
-static __init void xen_release_pte_init(unsigned long pfn)
+static void __init xen_release_pte_init(unsigned long pfn)
{
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
-static __init void xen_release_pmd_init(unsigned long pfn)
+static void __init xen_release_pmd_init(unsigned long pfn)
{
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
@@ -1621,7 +1689,7 @@ static void set_page_prot(void *addr, pgprot_t prot)
BUG();
}
-static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
{
unsigned pmdidx, pteidx;
unsigned ident_pte;
@@ -1653,9 +1721,6 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
pte_t pte;
- if (pfn > max_pfn_mapped)
- max_pfn_mapped = pfn;
-
if (!pte_none(pte_page[pteidx]))
continue;
@@ -1697,7 +1762,7 @@ static void convert_pfn_mfn(void *v)
}
/*
- * Set up the inital kernel pagetable.
+ * Set up the initial kernel pagetable.
*
* We can construct this by grafting the Xen provided pagetable into
* head_64.S's preconstructed pagetables. We copy the Xen L2's into
@@ -1707,12 +1772,18 @@ static void convert_pfn_mfn(void *v)
* of the physical mapping once some sort of allocator has been set
* up.
*/
-__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
unsigned long max_pfn)
{
pud_t *l3;
pmd_t *l2;
+ /* max_pfn_mapped is the last pfn mapped in the initial memory
+ * mappings. Considering that on Xen after the kernel mappings we
+ * have the mappings of some pages that don't exist in pfn space, we
+ * set max_pfn_mapped to the last real pfn mapped. */
+ max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
+
/* Zap identity mapping */
init_level4_pgt[0] = __pgd(0);
@@ -1772,7 +1843,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
-static __init void xen_write_cr3_init(unsigned long cr3)
+static void __init xen_write_cr3_init(unsigned long cr3)
{
unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
@@ -1809,7 +1880,7 @@ static __init void xen_write_cr3_init(unsigned long cr3)
pv_mmu_ops.write_cr3 = &xen_write_cr3;
}
-__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
unsigned long max_pfn)
{
pmd_t *kernel_pmd;
@@ -1817,9 +1888,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
initial_kernel_pmd =
extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
- max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
- xen_start_info->nr_pt_frames * PAGE_SIZE +
- 512*1024);
+ max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
@@ -1917,7 +1986,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#endif
}
-__init void xen_ident_map_ISA(void)
+void __init xen_ident_map_ISA(void)
{
unsigned long pa;
@@ -1940,8 +2009,11 @@ __init void xen_ident_map_ISA(void)
xen_flush_tlb();
}
-static __init void xen_post_allocator_init(void)
+static void __init xen_post_allocator_init(void)
{
+#ifdef CONFIG_XEN_DEBUG
+ pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
+#endif
pv_mmu_ops.set_pte = xen_set_pte;
pv_mmu_ops.set_pmd = xen_set_pmd;
pv_mmu_ops.set_pud = xen_set_pud;
@@ -1974,7 +2046,7 @@ static void xen_leave_lazy_mmu(void)
preempt_enable();
}
-static const struct pv_mmu_ops xen_mmu_ops __initdata = {
+static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.read_cr2 = xen_read_cr2,
.write_cr2 = xen_write_cr2,
@@ -2047,6 +2119,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
void __init xen_init_mmu_ops(void)
{
+ x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
pv_mmu_ops = xen_mmu_ops;
@@ -2074,7 +2147,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
in_frames[i] = virt_to_mfn(vaddr);
MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
- set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
+ __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
if (out_frames)
out_frames[i] = virt_to_pfn(vaddr);
@@ -2353,6 +2426,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
#ifdef CONFIG_XEN_DEBUG_FS
+static int p2m_dump_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, p2m_dump_show, NULL);
+}
+
+static const struct file_operations p2m_dump_fops = {
+ .open = p2m_dump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static struct dentry *d_mmu_debug;
static int __init xen_mmu_debugfs(void)
@@ -2408,6 +2493,7 @@ static int __init xen_mmu_debugfs(void)
debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
&mmu_stats.prot_commit_batched);
+ debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
return 0;
}
fs_initcall(xen_mmu_debugfs);