diff options
Diffstat (limited to 'arch/x86/xen/p2m.c')
-rw-r--r-- | arch/x86/xen/p2m.c | 1172 |
1 files changed, 451 insertions, 721 deletions
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index b456b048eca9..edbc7a63fd73 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -3,21 +3,22 @@ * guests themselves, but it must also access and update the p2m array * during suspend/resume when all the pages are reallocated. * - * The p2m table is logically a flat array, but we implement it as a - * three-level tree to allow the address space to be sparse. + * The logical flat p2m table is mapped to a linear kernel memory area. + * For accesses by Xen a three-level tree linked via mfns only is set up to + * allow the address space to be sparse. * - * Xen - * | - * p2m_top p2m_top_mfn - * / \ / \ - * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn - * / \ / \ / / - * p2m p2m p2m p2m p2m p2m p2m ... + * Xen + * | + * p2m_top_mfn + * / \ + * p2m_mid_mfn p2m_mid_mfn + * / / + * p2m p2m p2m ... * * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. * - * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the - * maximum representable pseudo-physical address space is: + * The p2m_top_mfn level is limited to 1 page, so the maximum representable + * pseudo-physical address space is: * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages * * P2M_PER_PAGE depends on the architecture, as a mfn is always @@ -30,6 +31,9 @@ * leaf entries, or for the top root, or middle one, for which there is a void * entry, we assume it is "missing". So (for example) * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. + * We have a dedicated page p2m_missing with all entries being + * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m + * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns. * * We also have the possibility of setting 1-1 mappings on certain regions, so * that: @@ -39,122 +43,20 @@ * PCI BARs, or ACPI spaces), we can create mappings easily because we * get the PFN value to match the MFN. * - * For this to work efficiently we have one new page p2m_identity and - * allocate (via reserved_brk) any other pages we need to cover the sides - * (1GB or 4MB boundary violations). All entries in p2m_identity are set to - * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs, - * no other fancy value). + * For this to work efficiently we have one new page p2m_identity. All entries + * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only + * recognizes that and MFNs, no other fancy value). * * On lookup we spot that the entry points to p2m_identity and return the * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. * If the entry points to an allocated page, we just proceed as before and - * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in + * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in * appropriate functions (pfn_to_mfn). * * The reason for having the IDENTITY_FRAME_BIT instead of just returning the * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a * non-identity pfn. To protect ourselves against we elect to set (and get) the * IDENTITY_FRAME_BIT on all identity mapped PFNs. - * - * This simplistic diagram is used to explain the more subtle piece of code. - * There is also a digram of the P2M at the end that can help. - * Imagine your E820 looking as so: - * - * 1GB 2GB 4GB - * /-------------------+---------\/----\ /----------\ /---+-----\ - * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM | - * \-------------------+---------/\----/ \----------/ \---+-----/ - * ^- 1029MB ^- 2001MB - * - * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100), - * 2048MB = 524288 (0x80000)] - * - * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB - * is actually not present (would have to kick the balloon driver to put it in). - * - * When we are told to set the PFNs for identity mapping (see patch: "xen/setup: - * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start - * of the PFN and the end PFN (263424 and 512256 respectively). The first step - * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page - * covers 512^2 of page estate (1GB) and in case the start or end PFN is not - * aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as - * required to split any existing p2m_mid_missing middle pages. - * - * With the E820 example above, 263424 is not 1GB aligned so we allocate a - * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000. - * Each entry in the allocate page is "missing" (points to p2m_missing). - * - * Next stage is to determine if we need to do a more granular boundary check - * on the 4MB (or 2MB depending on architecture) off the start and end pfn's. - * We check if the start pfn and end pfn violate that boundary check, and if - * so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer - * granularity of setting which PFNs are missing and which ones are identity. - * In our example 263424 and 512256 both fail the check so we reserve_brk two - * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" - * values) and assign them to p2m[1][2] and p2m[1][488] respectively. - * - * At this point we would at minimum reserve_brk one page, but could be up to - * three. Each call to set_phys_range_identity has at maximum a three page - * cost. If we were to query the P2M at this stage, all those entries from - * start PFN through end PFN (so 1029MB -> 2001MB) would return - * INVALID_P2M_ENTRY ("missing"). - * - * The next step is to walk from the start pfn to the end pfn setting - * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity. - * If we find that the middle entry is pointing to p2m_missing we can swap it - * over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and - * similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions). - * At this point we do not need to worry about boundary aligment (so no need to - * reserve_brk a middle page, figure out which PFNs are "missing" and which - * ones are identity), as that has been done earlier. If we find that the - * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference - * that page (which covers 512 PFNs) and set the appropriate PFN with - * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we - * set from p2m[1][2][256->511] and p2m[1][488][0->256] with - * IDENTITY_FRAME_BIT set. - * - * All other regions that are void (or not filled) either point to p2m_missing - * (considered missing) or have the default value of INVALID_P2M_ENTRY (also - * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511] - * contain the INVALID_P2M_ENTRY value and are considered "missing." - * - * Finally, the region beyond the end of of the E820 (4 GB in this example) - * is set to be identity (in case there are MMIO regions placed here). - * - * This is what the p2m ends up looking (for the E820 above) with this - * fabulous drawing: - * - * p2m /--------------\ - * /-----\ | &mfn_list[0],| /-----------------\ - * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. | - * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] | - * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] | - * |-----| \ | [p2m_identity]+\\ | .... | - * | 2 |--\ \-------------------->| ... | \\ \----------------/ - * |-----| \ \---------------/ \\ - * | 3 |-\ \ \\ p2m_identity [1] - * |-----| \ \-------------------->/---------------\ /-----------------\ - * | .. |\ | | [p2m_identity]+-->| ~0, ~0, ~0, ... | - * \-----/ | | | [p2m_identity]+-->| ..., ~0 | - * | | | .... | \-----------------/ - * | | +-[x], ~0, ~0.. +\ - * | | \---------------/ \ - * | | \-> /---------------\ - * | V p2m_mid_missing p2m_missing | IDENTITY[@0] | - * | /-----------------\ /------------\ | IDENTITY[@256]| - * | | [p2m_missing] +---->| ~0, ~0, ...| | ~0, ~0, .... | - * | | [p2m_missing] +---->| ..., ~0 | \---------------/ - * | | ... | \------------/ - * | \-----------------/ - * | - * | p2m_mid_identity - * | /-----------------\ - * \-->| [p2m_identity] +---->[1] - * | [p2m_identity] +---->[1] - * | ... | - * \-----------------/ - * - * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) */ #include <linux/init.h> @@ -164,9 +66,11 @@ #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/bootmem.h> +#include <linux/slab.h> #include <asm/cache.h> #include <asm/setup.h> +#include <asm/uaccess.h> #include <asm/xen/page.h> #include <asm/xen/hypercall.h> @@ -178,31 +82,26 @@ #include "multicalls.h" #include "xen-ops.h" +#define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE) + static void __init m2p_override_init(void); +unsigned long *xen_p2m_addr __read_mostly; +EXPORT_SYMBOL_GPL(xen_p2m_addr); +unsigned long xen_p2m_size __read_mostly; +EXPORT_SYMBOL_GPL(xen_p2m_size); unsigned long xen_max_p2m_pfn __read_mostly; +EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); + +static DEFINE_SPINLOCK(p2m_update_lock); static unsigned long *p2m_mid_missing_mfn; static unsigned long *p2m_top_mfn; static unsigned long **p2m_top_mfn_p; - -/* Placeholders for holes in the address space */ -static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); -static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); - -static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); - -static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); -static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE); - -RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); - -/* For each I/O range remapped we may lose up to two leaf pages for the boundary - * violations and three mid pages to cover up to 3GB. With - * early_can_reuse_p2m_middle() most of the leaf pages will be reused by the - * remapped region. - */ -RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES); +static unsigned long *p2m_missing; +static unsigned long *p2m_identity; +static pte_t *p2m_missing_pte; +static pte_t *p2m_identity_pte; static inline unsigned p2m_top_index(unsigned long pfn) { @@ -220,14 +119,6 @@ static inline unsigned p2m_index(unsigned long pfn) return pfn % P2M_PER_PAGE; } -static void p2m_top_init(unsigned long ***top) -{ - unsigned i; - - for (i = 0; i < P2M_TOP_PER_PAGE; i++) - top[i] = p2m_mid_missing; -} - static void p2m_top_mfn_init(unsigned long *top) { unsigned i; @@ -244,28 +135,43 @@ static void p2m_top_mfn_p_init(unsigned long **top) top[i] = p2m_mid_missing_mfn; } -static void p2m_mid_init(unsigned long **mid, unsigned long *leaf) +static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) { unsigned i; for (i = 0; i < P2M_MID_PER_PAGE; i++) - mid[i] = leaf; + mid[i] = virt_to_mfn(leaf); } -static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) +static void p2m_init(unsigned long *p2m) { unsigned i; - for (i = 0; i < P2M_MID_PER_PAGE; i++) - mid[i] = virt_to_mfn(leaf); + for (i = 0; i < P2M_PER_PAGE; i++) + p2m[i] = INVALID_P2M_ENTRY; } -static void p2m_init(unsigned long *p2m) +static void p2m_init_identity(unsigned long *p2m, unsigned long pfn) { unsigned i; - for (i = 0; i < P2M_MID_PER_PAGE; i++) - p2m[i] = INVALID_P2M_ENTRY; + for (i = 0; i < P2M_PER_PAGE; i++) + p2m[i] = IDENTITY_FRAME(pfn + i); +} + +static void * __ref alloc_p2m_page(void) +{ + if (unlikely(!slab_is_available())) + return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); + + return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); +} + +/* Only to be called in case of a race for a page just allocated! */ +static void free_p2m_page(void *p) +{ + BUG_ON(!slab_is_available()); + free_page((unsigned long)p); } /* @@ -280,40 +186,46 @@ static void p2m_init(unsigned long *p2m) */ void __ref xen_build_mfn_list_list(void) { - unsigned long pfn; + unsigned long pfn, mfn; + pte_t *ptep; + unsigned int level, topidx, mididx; + unsigned long *mid_mfn_p; if (xen_feature(XENFEAT_auto_translated_physmap)) return; /* Pre-initialize p2m_top_mfn to be completely missing */ if (p2m_top_mfn == NULL) { - p2m_mid_missing_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); + p2m_mid_missing_mfn = alloc_p2m_page(); p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); - p2m_top_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); + p2m_top_mfn_p = alloc_p2m_page(); p2m_top_mfn_p_init(p2m_top_mfn_p); - p2m_top_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); + p2m_top_mfn = alloc_p2m_page(); p2m_top_mfn_init(p2m_top_mfn); } else { /* Reinitialise, mfn's all change after migration */ p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); } - for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { - unsigned topidx = p2m_top_index(pfn); - unsigned mididx = p2m_mid_index(pfn); - unsigned long **mid; - unsigned long *mid_mfn_p; + for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN; + pfn += P2M_PER_PAGE) { + topidx = p2m_top_index(pfn); + mididx = p2m_mid_index(pfn); - mid = p2m_top[topidx]; mid_mfn_p = p2m_top_mfn_p[topidx]; + ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), + &level); + BUG_ON(!ptep || level != PG_LEVEL_4K); + mfn = pte_mfn(*ptep); + ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); /* Don't bother allocating any mfn mid levels if * they're just missing, just update the stored mfn, * since all could have changed over a migrate. */ - if (mid == p2m_mid_missing) { + if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) { BUG_ON(mididx); BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); @@ -322,19 +234,14 @@ void __ref xen_build_mfn_list_list(void) } if (mid_mfn_p == p2m_mid_missing_mfn) { - /* - * XXX boot-time only! We should never find - * missing parts of the mfn tree after - * runtime. - */ - mid_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); + mid_mfn_p = alloc_p2m_page(); p2m_mid_mfn_init(mid_mfn_p, p2m_missing); p2m_top_mfn_p[topidx] = mid_mfn_p; } p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); - mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); + mid_mfn_p[mididx] = mfn; } } @@ -353,171 +260,235 @@ void xen_setup_mfn_list_list(void) /* Set up p2m_top to point to the domain-builder provided p2m pages */ void __init xen_build_dynamic_phys_to_machine(void) { - unsigned long *mfn_list; - unsigned long max_pfn; unsigned long pfn; if (xen_feature(XENFEAT_auto_translated_physmap)) return; - mfn_list = (unsigned long *)xen_start_info->mfn_list; - max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); - xen_max_p2m_pfn = max_pfn; + xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list; + xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE); - p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_init(p2m_missing); - p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_init(p2m_identity); + for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++) + xen_p2m_addr[pfn] = INVALID_P2M_ENTRY; - p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_init(p2m_mid_missing, p2m_missing); - p2m_mid_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_init(p2m_mid_identity, p2m_identity); + xen_max_p2m_pfn = xen_p2m_size; +} - p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_top_init(p2m_top); +#define P2M_TYPE_IDENTITY 0 +#define P2M_TYPE_MISSING 1 +#define P2M_TYPE_PFN 2 +#define P2M_TYPE_UNKNOWN 3 - /* - * The domain builder gives us a pre-constructed p2m array in - * mfn_list for all the pages initially given to us, so we just - * need to graft that into our tree structure. - */ - for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { - unsigned topidx = p2m_top_index(pfn); - unsigned mididx = p2m_mid_index(pfn); +static int xen_p2m_elem_type(unsigned long pfn) +{ + unsigned long mfn; - if (p2m_top[topidx] == p2m_mid_missing) { - unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_init(mid, p2m_missing); + if (pfn >= xen_p2m_size) + return P2M_TYPE_IDENTITY; - p2m_top[topidx] = mid; - } + mfn = xen_p2m_addr[pfn]; - /* - * As long as the mfn_list has enough entries to completely - * fill a p2m page, pointing into the array is ok. But if - * not the entries beyond the last pfn will be undefined. - */ - if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) { - unsigned long p2midx; + if (mfn == INVALID_P2M_ENTRY) + return P2M_TYPE_MISSING; - p2midx = max_pfn % P2M_PER_PAGE; - for ( ; p2midx < P2M_PER_PAGE; p2midx++) - mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY; - } - p2m_top[topidx][mididx] = &mfn_list[pfn]; - } + if (mfn & IDENTITY_FRAME_BIT) + return P2M_TYPE_IDENTITY; - m2p_override_init(); + return P2M_TYPE_PFN; } -#ifdef CONFIG_X86_64 -unsigned long __init xen_revector_p2m_tree(void) + +static void __init xen_rebuild_p2m_list(unsigned long *p2m) { - unsigned long va_start; - unsigned long va_end; + unsigned int i, chunk; unsigned long pfn; - unsigned long pfn_free = 0; - unsigned long *mfn_list = NULL; - unsigned long size; - - va_start = xen_start_info->mfn_list; - /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long), - * so make sure it is rounded up to that */ - size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); - va_end = va_start + size; - - /* If we were revectored already, don't do it again. */ - if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET) - return 0; + unsigned long *mfns; + pte_t *ptep; + pmd_t *pmdp; + int type; - mfn_list = alloc_bootmem_align(size, PAGE_SIZE); - if (!mfn_list) { - pr_warn("Could not allocate space for a new P2M tree!\n"); - return xen_start_info->mfn_list; - } - /* Fill it out with INVALID_P2M_ENTRY value */ - memset(mfn_list, 0xFF, size); + p2m_missing = alloc_p2m_page(); + p2m_init(p2m_missing); + p2m_identity = alloc_p2m_page(); + p2m_init(p2m_identity); - for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { - unsigned topidx = p2m_top_index(pfn); - unsigned mididx; - unsigned long *mid_p; + p2m_missing_pte = alloc_p2m_page(); + paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT); + p2m_identity_pte = alloc_p2m_page(); + paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT); + for (i = 0; i < PTRS_PER_PTE; i++) { + set_pte(p2m_missing_pte + i, + pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO)); + set_pte(p2m_identity_pte + i, + pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO)); + } - if (!p2m_top[topidx]) + for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) { + /* + * Try to map missing/identity PMDs or p2m-pages if possible. + * We have to respect the structure of the mfn_list_list + * which will be built just afterwards. + * Chunk size to test is one p2m page if we are in the middle + * of a mfn_list_list mid page and the complete mid page area + * if we are at index 0 of the mid page. Please note that a + * mid page might cover more than one PMD, e.g. on 32 bit PAE + * kernels. + */ + chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ? + P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE; + + type = xen_p2m_elem_type(pfn); + i = 0; + if (type != P2M_TYPE_PFN) + for (i = 1; i < chunk; i++) + if (xen_p2m_elem_type(pfn + i) != type) + break; + if (i < chunk) + /* Reset to minimal chunk size. */ + chunk = P2M_PER_PAGE; + + if (type == P2M_TYPE_PFN || i < chunk) { + /* Use initial p2m page contents. */ +#ifdef CONFIG_X86_64 + mfns = alloc_p2m_page(); + copy_page(mfns, xen_p2m_addr + pfn); +#else + mfns = xen_p2m_addr + pfn; +#endif + ptep = populate_extra_pte((unsigned long)(p2m + pfn)); + set_pte(ptep, + pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL)); continue; + } - if (p2m_top[topidx] == p2m_mid_missing) + if (chunk == P2M_PER_PAGE) { + /* Map complete missing or identity p2m-page. */ + mfns = (type == P2M_TYPE_MISSING) ? + p2m_missing : p2m_identity; + ptep = populate_extra_pte((unsigned long)(p2m + pfn)); + set_pte(ptep, + pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL_RO)); continue; + } - mididx = p2m_mid_index(pfn); - mid_p = p2m_top[topidx][mididx]; - if (!mid_p) - continue; - if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) - continue; + /* Complete missing or identity PMD(s) can be mapped. */ + ptep = (type == P2M_TYPE_MISSING) ? + p2m_missing_pte : p2m_identity_pte; + for (i = 0; i < PMDS_PER_MID_PAGE; i++) { + pmdp = populate_extra_pmd( + (unsigned long)(p2m + pfn + i * PTRS_PER_PTE)); + set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE)); + } + } +} - if ((unsigned long)mid_p == INVALID_P2M_ENTRY) - continue; +void __init xen_vmalloc_p2m_tree(void) +{ + static struct vm_struct vm; - /* The old va. Rebase it on mfn_list */ - if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { - unsigned long *new; + vm.flags = VM_ALLOC; + vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn, + PMD_SIZE * PMDS_PER_MID_PAGE); + vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE); + pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size); - if (pfn_free > (size / sizeof(unsigned long))) { - WARN(1, "Only allocated for %ld pages, but we want %ld!\n", - size / sizeof(unsigned long), pfn_free); - return 0; - } - new = &mfn_list[pfn_free]; + xen_max_p2m_pfn = vm.size / sizeof(unsigned long); - copy_page(new, mid_p); - p2m_top[topidx][mididx] = &mfn_list[pfn_free]; + xen_rebuild_p2m_list(vm.addr); - pfn_free += P2M_PER_PAGE; + xen_p2m_addr = vm.addr; + xen_p2m_size = xen_max_p2m_pfn; - } - /* This should be the leafs allocated for identity from _brk. */ - } - return (unsigned long)mfn_list; + xen_inv_extra_mem(); + m2p_override_init(); } -#else -unsigned long __init xen_revector_p2m_tree(void) -{ - return 0; -} -#endif + unsigned long get_phys_to_machine(unsigned long pfn) { - unsigned topidx, mididx, idx; + pte_t *ptep; + unsigned int level; + + if (unlikely(pfn >= xen_p2m_size)) { + if (pfn < xen_max_p2m_pfn) + return xen_chk_extra_mem(pfn); - if (unlikely(pfn >= MAX_P2M_PFN)) return IDENTITY_FRAME(pfn); + } - topidx = p2m_top_index(pfn); - mididx = p2m_mid_index(pfn); - idx = p2m_index(pfn); + ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); + BUG_ON(!ptep || level != PG_LEVEL_4K); /* * The INVALID_P2M_ENTRY is filled in both p2m_*identity * and in p2m_*missing, so returning the INVALID_P2M_ENTRY * would be wrong. */ - if (p2m_top[topidx][mididx] == p2m_identity) + if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) return IDENTITY_FRAME(pfn); - return p2m_top[topidx][mididx][idx]; + return xen_p2m_addr[pfn]; } EXPORT_SYMBOL_GPL(get_phys_to_machine); -static void *alloc_p2m_page(void) +/* + * Allocate new pmd(s). It is checked whether the old pmd is still in place. + * If not, nothing is changed. This is okay as the only reason for allocating + * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual + * pmd. In case of PAE/x86-32 there are multiple pmds to allocate! + */ +static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg) { - return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); -} + pte_t *ptechk; + pte_t *pteret = ptep; + pte_t *pte_newpg[PMDS_PER_MID_PAGE]; + pmd_t *pmdp; + unsigned int level; + unsigned long flags; + unsigned long vaddr; + int i; -static void free_p2m_page(void *p) -{ - free_page((unsigned long)p); + /* Do all allocations first to bail out in error case. */ + for (i = 0; i < PMDS_PER_MID_PAGE; i++) { + pte_newpg[i] = alloc_p2m_page(); + if (!pte_newpg[i]) { + for (i--; i >= 0; i--) + free_p2m_page(pte_newpg[i]); + + return NULL; + } + } + + vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1); + + for (i = 0; i < PMDS_PER_MID_PAGE; i++) { + copy_page(pte_newpg[i], pte_pg); + paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT); + + pmdp = lookup_pmd_address(vaddr); + BUG_ON(!pmdp); + + spin_lock_irqsave(&p2m_update_lock, flags); + + ptechk = lookup_address(vaddr, &level); + if (ptechk == pte_pg) { + set_pmd(pmdp, + __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE)); + if (vaddr == (addr & ~(PMD_SIZE - 1))) + pteret = pte_offset_kernel(pmdp, addr); + pte_newpg[i] = NULL; + } + + spin_unlock_irqrestore(&p2m_update_lock, flags); + + if (pte_newpg[i]) { + paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT); + free_p2m_page(pte_newpg[i]); + } + + vaddr += PMD_SIZE; + } + + return pteret; } /* @@ -530,58 +501,62 @@ static void free_p2m_page(void *p) static bool alloc_p2m(unsigned long pfn) { unsigned topidx, mididx; - unsigned long ***top_p, **mid; unsigned long *top_mfn_p, *mid_mfn; - unsigned long *p2m_orig; + pte_t *ptep, *pte_pg; + unsigned int level; + unsigned long flags; + unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); + unsigned long p2m_pfn; topidx = p2m_top_index(pfn); mididx = p2m_mid_index(pfn); - top_p = &p2m_top[topidx]; - mid = ACCESS_ONCE(*top_p); + ptep = lookup_address(addr, &level); + BUG_ON(!ptep || level != PG_LEVEL_4K); + pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); - if (mid == p2m_mid_missing) { - /* Mid level is missing, allocate a new one */ - mid = alloc_p2m_page(); - if (!mid) + if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) { + /* PMD level is missing, allocate a new one */ + ptep = alloc_p2m_pmd(addr, ptep, pte_pg); + if (!ptep) return false; - - p2m_mid_init(mid, p2m_missing); - - if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) - free_p2m_page(mid); } - top_mfn_p = &p2m_top_mfn[topidx]; - mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); + if (p2m_top_mfn) { + top_mfn_p = &p2m_top_mfn[topidx]; + mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); - BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); + BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); - if (mid_mfn == p2m_mid_missing_mfn) { - /* Separately check the mid mfn level */ - unsigned long missing_mfn; - unsigned long mid_mfn_mfn; - unsigned long old_mfn; + if (mid_mfn == p2m_mid_missing_mfn) { + /* Separately check the mid mfn level */ + unsigned long missing_mfn; + unsigned long mid_mfn_mfn; + unsigned long old_mfn; - mid_mfn = alloc_p2m_page(); - if (!mid_mfn) - return false; + mid_mfn = alloc_p2m_page(); + if (!mid_mfn) + return false; - p2m_mid_mfn_init(mid_mfn, p2m_missing); + p2m_mid_mfn_init(mid_mfn, p2m_missing); - missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); - mid_mfn_mfn = virt_to_mfn(mid_mfn); - old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); - if (old_mfn != missing_mfn) { - free_p2m_page(mid_mfn); - mid_mfn = mfn_to_virt(old_mfn); - } else { - p2m_top_mfn_p[topidx] = mid_mfn; + missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); + mid_mfn_mfn = virt_to_mfn(mid_mfn); + old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); + if (old_mfn != missing_mfn) { + free_p2m_page(mid_mfn); + mid_mfn = mfn_to_virt(old_mfn); + } else { + p2m_top_mfn_p[topidx] = mid_mfn; + } } + } else { + mid_mfn = NULL; } - p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]); - if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) { + p2m_pfn = pte_pfn(ACCESS_ONCE(*ptep)); + if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) || + p2m_pfn == PFN_DOWN(__pa(p2m_missing))) { /* p2m leaf page is missing */ unsigned long *p2m; @@ -589,183 +564,36 @@ static bool alloc_p2m(unsigned long pfn) if (!p2m) return false; - p2m_init(p2m); - - if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig) - free_p2m_page(p2m); + if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) + p2m_init(p2m); else - mid_mfn[mididx] = virt_to_mfn(p2m); - } - - return true; -} - -static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary) -{ - unsigned topidx, mididx, idx; - unsigned long *p2m; - - topidx = p2m_top_index(pfn); - mididx = p2m_mid_index(pfn); - idx = p2m_index(pfn); - - /* Pfff.. No boundary cross-over, lets get out. */ - if (!idx && check_boundary) - return false; - - WARN(p2m_top[topidx][mididx] == p2m_identity, - "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n", - topidx, mididx); - - /* - * Could be done by xen_build_dynamic_phys_to_machine.. - */ - if (p2m_top[topidx][mididx] != p2m_missing) - return false; - - /* Boundary cross-over for the edges: */ - p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); - - p2m_init(p2m); + p2m_init_identity(p2m, pfn); - p2m_top[topidx][mididx] = p2m; + spin_lock_irqsave(&p2m_update_lock, flags); - return true; -} - -static bool __init early_alloc_p2m_middle(unsigned long pfn) -{ - unsigned topidx = p2m_top_index(pfn); - unsigned long **mid; - - mid = p2m_top[topidx]; - if (mid == p2m_mid_missing) { - mid = extend_brk(PAGE_SIZE, PAGE_SIZE); - - p2m_mid_init(mid, p2m_missing); - - p2m_top[topidx] = mid; - } - return true; -} - -/* - * Skim over the P2M tree looking at pages that are either filled with - * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and - * replace the P2M leaf with a p2m_missing or p2m_identity. - * Stick the old page in the new P2M tree location. - */ -static bool __init early_can_reuse_p2m_middle(unsigned long set_pfn) -{ - unsigned topidx; - unsigned mididx; - unsigned ident_pfns; - unsigned inv_pfns; - unsigned long *p2m; - unsigned idx; - unsigned long pfn; - - /* We only look when this entails a P2M middle layer */ - if (p2m_index(set_pfn)) - return false; - - for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) { - topidx = p2m_top_index(pfn); - - if (!p2m_top[topidx]) - continue; - - if (p2m_top[topidx] == p2m_mid_missing) - continue; - - mididx = p2m_mid_index(pfn); - p2m = p2m_top[topidx][mididx]; - if (!p2m) - continue; - - if ((p2m == p2m_missing) || (p2m == p2m_identity)) - continue; - - if ((unsigned long)p2m == INVALID_P2M_ENTRY) - continue; - - ident_pfns = 0; - inv_pfns = 0; - for (idx = 0; idx < P2M_PER_PAGE; idx++) { - /* IDENTITY_PFNs are 1:1 */ - if (p2m[idx] == IDENTITY_FRAME(pfn + idx)) - ident_pfns++; - else if (p2m[idx] == INVALID_P2M_ENTRY) - inv_pfns++; - else - break; + if (pte_pfn(*ptep) == p2m_pfn) { + set_pte(ptep, + pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL)); + if (mid_mfn) + mid_mfn[mididx] = virt_to_mfn(p2m); + p2m = NULL; } - if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE)) - goto found; - } - return false; -found: - /* Found one, replace old with p2m_identity or p2m_missing */ - p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); - - /* Reset where we want to stick the old page in. */ - topidx = p2m_top_index(set_pfn); - mididx = p2m_mid_index(set_pfn); - - /* This shouldn't happen */ - if (WARN_ON(p2m_top[topidx] == p2m_mid_missing)) - early_alloc_p2m_middle(set_pfn); - - if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing)) - return false; - - p2m_init(p2m); - p2m_top[topidx][mididx] = p2m; - return true; -} -bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) -{ - if (unlikely(!__set_phys_to_machine(pfn, mfn))) { - if (!early_alloc_p2m_middle(pfn)) - return false; - - if (early_can_reuse_p2m_middle(pfn)) - return __set_phys_to_machine(pfn, mfn); - - if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/)) - return false; + spin_unlock_irqrestore(&p2m_update_lock, flags); - if (!__set_phys_to_machine(pfn, mfn)) - return false; + if (p2m) + free_p2m_page(p2m); } return true; } -static void __init early_split_p2m(unsigned long pfn) -{ - unsigned long mididx, idx; - - mididx = p2m_mid_index(pfn); - idx = p2m_index(pfn); - - /* - * Allocate new middle and leaf pages if this pfn lies in the - * middle of one. - */ - if (mididx || idx) - early_alloc_p2m_middle(pfn); - if (idx) - early_alloc_p2m(pfn, false); -} - unsigned long __init set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e) { unsigned long pfn; - if (unlikely(pfn_s >= MAX_P2M_PFN)) + if (unlikely(pfn_s >= xen_p2m_size)) return 0; if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) @@ -774,101 +602,51 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, if (pfn_s > pfn_e) return 0; - if (pfn_e > MAX_P2M_PFN) - pfn_e = MAX_P2M_PFN; - - early_split_p2m(pfn_s); - early_split_p2m(pfn_e); - - for (pfn = pfn_s; pfn < pfn_e;) { - unsigned topidx = p2m_top_index(pfn); - unsigned mididx = p2m_mid_index(pfn); - - if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) - break; - pfn++; - - /* - * If the PFN was set to a middle or leaf identity - * page the remainder must also be identity, so skip - * ahead to the next middle or leaf entry. - */ - if (p2m_top[topidx] == p2m_mid_identity) - pfn = ALIGN(pfn, P2M_MID_PER_PAGE * P2M_PER_PAGE); - else if (p2m_top[topidx][mididx] == p2m_identity) - pfn = ALIGN(pfn, P2M_PER_PAGE); - } + if (pfn_e > xen_p2m_size) + pfn_e = xen_p2m_size; - WARN((pfn - pfn_s) != (pfn_e - pfn_s), - "Identity mapping failed. We are %ld short of 1-1 mappings!\n", - (pfn_e - pfn_s) - (pfn - pfn_s)); + for (pfn = pfn_s; pfn < pfn_e; pfn++) + xen_p2m_addr[pfn] = IDENTITY_FRAME(pfn); return pfn - pfn_s; } -/* Try to install p2m mapping; fail if intermediate bits missing */ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) { - unsigned topidx, mididx, idx; + pte_t *ptep; + unsigned int level; /* don't track P2M changes in autotranslate guests */ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) return true; - if (unlikely(pfn >= MAX_P2M_PFN)) { + if (unlikely(pfn >= xen_p2m_size)) { BUG_ON(mfn != INVALID_P2M_ENTRY); return true; } - topidx = p2m_top_index(pfn); - mididx = p2m_mid_index(pfn); - idx = p2m_index(pfn); - - /* For sparse holes were the p2m leaf has real PFN along with - * PCI holes, stick in the PFN as the MFN value. - * - * set_phys_range_identity() will have allocated new middle - * and leaf pages as required so an existing p2m_mid_missing - * or p2m_missing mean that whole range will be identity so - * these can be switched to p2m_mid_identity or p2m_identity. - */ - if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) { - if (p2m_top[topidx] == p2m_mid_identity) - return true; - - if (p2m_top[topidx] == p2m_mid_missing) { - WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing, - p2m_mid_identity) != p2m_mid_missing); - return true; - } - - if (p2m_top[topidx][mididx] == p2m_identity) - return true; + if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn))) + return true; - /* Swap over from MISSING to IDENTITY if needed. */ - if (p2m_top[topidx][mididx] == p2m_missing) { - WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing, - p2m_identity) != p2m_missing); - return true; - } - } + ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); + BUG_ON(!ptep || level != PG_LEVEL_4K); - if (p2m_top[topidx][mididx] == p2m_missing) + if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing))) return mfn == INVALID_P2M_ENTRY; - p2m_top[topidx][mididx][idx] = mfn; + if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) + return mfn == IDENTITY_FRAME(pfn); - return true; + return false; } bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) { - if (unlikely(!__set_phys_to_machine(pfn, mfn))) { + if (unlikely(!__set_phys_to_machine(pfn, mfn))) { if (!alloc_p2m(pfn)) return false; - if (!__set_phys_to_machine(pfn, mfn)) - return false; + return __set_phys_to_machine(pfn, mfn); } return true; @@ -877,15 +655,16 @@ bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) #define M2P_OVERRIDE_HASH_SHIFT 10 #define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT) -static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH); +static struct list_head *m2p_overrides; static DEFINE_SPINLOCK(m2p_override_lock); static void __init m2p_override_init(void) { unsigned i; - m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, - sizeof(unsigned long)); + m2p_overrides = alloc_bootmem_align( + sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, + sizeof(unsigned long)); for (i = 0; i < M2P_OVERRIDE_HASH; i++) INIT_LIST_HEAD(&m2p_overrides[i]); @@ -896,68 +675,9 @@ static unsigned long mfn_hash(unsigned long mfn) return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); } -int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, - struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count) -{ - int i, ret = 0; - bool lazy = false; - pte_t *pte; - - if (xen_feature(XENFEAT_auto_translated_physmap)) - return 0; - - if (kmap_ops && - !in_interrupt() && - paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { - arch_enter_lazy_mmu_mode(); - lazy = true; - } - - for (i = 0; i < count; i++) { - unsigned long mfn, pfn; - - /* Do not add to override if the map failed. */ - if (map_ops[i].status) - continue; - - if (map_ops[i].flags & GNTMAP_contains_pte) { - pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + - (map_ops[i].host_addr & ~PAGE_MASK)); - mfn = pte_mfn(*pte); - } else { - mfn = PFN_DOWN(map_ops[i].dev_bus_addr); - } - pfn = page_to_pfn(pages[i]); - - WARN_ON(PagePrivate(pages[i])); - SetPagePrivate(pages[i]); - set_page_private(pages[i], mfn); - pages[i]->index = pfn_to_mfn(pfn); - - if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { - ret = -ENOMEM; - goto out; - } - - if (kmap_ops) { - ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); - if (ret) - goto out; - } - } - -out: - if (lazy) - arch_leave_lazy_mmu_mode(); - - return ret; -} -EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); - /* Add an MFN override for a particular page */ -int m2p_add_override(unsigned long mfn, struct page *page, - struct gnttab_map_grant_ref *kmap_op) +static int m2p_add_override(unsigned long mfn, struct page *page, + struct gnttab_map_grant_ref *kmap_op) { unsigned long flags; unsigned long pfn; @@ -970,7 +690,7 @@ int m2p_add_override(unsigned long mfn, struct page *page, address = (unsigned long)__va(pfn << PAGE_SHIFT); ptep = lookup_address(address, &level); if (WARN(ptep == NULL || level != PG_LEVEL_4K, - "m2p_add_override: pfn %lx not mapped", pfn)) + "m2p_add_override: pfn %lx not mapped", pfn)) return -EINVAL; } @@ -1004,19 +724,19 @@ int m2p_add_override(unsigned long mfn, struct page *page, * because mfn_to_pfn (that ends up being called by GUPF) will * return the backend pfn rather than the frontend pfn. */ pfn = mfn_to_pfn_no_overrides(mfn); - if (get_phys_to_machine(pfn) == mfn) + if (__pfn_to_mfn(pfn) == mfn) set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); return 0; } -EXPORT_SYMBOL_GPL(m2p_add_override); -int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count) +int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) { int i, ret = 0; bool lazy = false; + pte_t *pte; if (xen_feature(XENFEAT_auto_translated_physmap)) return 0; @@ -1029,35 +749,75 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, } for (i = 0; i < count; i++) { - unsigned long mfn = get_phys_to_machine(page_to_pfn(pages[i])); - unsigned long pfn = page_to_pfn(pages[i]); + unsigned long mfn, pfn; - if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { - ret = -EINVAL; - goto out; + /* Do not add to override if the map failed. */ + if (map_ops[i].status) + continue; + + if (map_ops[i].flags & GNTMAP_contains_pte) { + pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + + (map_ops[i].host_addr & ~PAGE_MASK)); + mfn = pte_mfn(*pte); + } else { + mfn = PFN_DOWN(map_ops[i].dev_bus_addr); } + pfn = page_to_pfn(pages[i]); - set_page_private(pages[i], INVALID_P2M_ENTRY); - WARN_ON(!PagePrivate(pages[i])); - ClearPagePrivate(pages[i]); - set_phys_to_machine(pfn, pages[i]->index); + WARN_ON(PagePrivate(pages[i])); + SetPagePrivate(pages[i]); + set_page_private(pages[i], mfn); + pages[i]->index = pfn_to_mfn(pfn); - if (kmap_ops) - ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn); - if (ret) + if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { + ret = -ENOMEM; goto out; + } + + if (kmap_ops) { + ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); + if (ret) + goto out; + } } out: if (lazy) arch_leave_lazy_mmu_mode(); + return ret; } -EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); +EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); -int m2p_remove_override(struct page *page, - struct gnttab_map_grant_ref *kmap_op, - unsigned long mfn) +static struct page *m2p_find_override(unsigned long mfn) +{ + unsigned long flags; + struct list_head *bucket; + struct page *p, *ret; + + if (unlikely(!m2p_overrides)) + return NULL; + + ret = NULL; + bucket = &m2p_overrides[mfn_hash(mfn)]; + + spin_lock_irqsave(&m2p_override_lock, flags); + + list_for_each_entry(p, bucket, lru) { + if (page_private(p) == mfn) { + ret = p; + break; + } + } + + spin_unlock_irqrestore(&m2p_override_lock, flags); + + return ret; +} + +static int m2p_remove_override(struct page *page, + struct gnttab_map_grant_ref *kmap_op, + unsigned long mfn) { unsigned long flags; unsigned long pfn; @@ -1072,7 +832,7 @@ int m2p_remove_override(struct page *page, ptep = lookup_address(address, &level); if (WARN(ptep == NULL || level != PG_LEVEL_4K, - "m2p_remove_override: pfn %lx not mapped", pfn)) + "m2p_remove_override: pfn %lx not mapped", pfn)) return -EINVAL; } @@ -1102,9 +862,8 @@ int m2p_remove_override(struct page *page, * hypercall actually returned an error. */ if (kmap_op->handle == GNTST_general_error) { - printk(KERN_WARNING "m2p_remove_override: " - "pfn %lx mfn %lx, failed to modify kernel mappings", - pfn, mfn); + pr_warn("m2p_remove_override: pfn %lx mfn %lx, failed to modify kernel mappings", + pfn, mfn); put_balloon_scratch_page(); return -1; } @@ -1112,14 +871,14 @@ int m2p_remove_override(struct page *page, xen_mc_batch(); mcs = __xen_mc_entry( - sizeof(struct gnttab_unmap_and_replace)); + sizeof(struct gnttab_unmap_and_replace)); unmap_op = mcs.args; unmap_op->host_addr = kmap_op->host_addr; unmap_op->new_addr = scratch_page_address; unmap_op->handle = kmap_op->handle; MULTI_grant_table_op(mcs.mc, - GNTTABOP_unmap_and_replace, unmap_op, 1); + GNTTABOP_unmap_and_replace, unmap_op, 1); mcs = __xen_mc_entry(0); MULTI_update_va_mapping(mcs.mc, scratch_page_address, @@ -1145,35 +904,56 @@ int m2p_remove_override(struct page *page, * pfn again. */ mfn &= ~FOREIGN_FRAME_BIT; pfn = mfn_to_pfn_no_overrides(mfn); - if (get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && + if (__pfn_to_mfn(pfn) == FOREIGN_FRAME(mfn) && m2p_find_override(mfn) == NULL) set_phys_to_machine(pfn, mfn); return 0; } -EXPORT_SYMBOL_GPL(m2p_remove_override); -struct page *m2p_find_override(unsigned long mfn) +int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) { - unsigned long flags; - struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; - struct page *p, *ret; + int i, ret = 0; + bool lazy = false; - ret = NULL; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return 0; - spin_lock_irqsave(&m2p_override_lock, flags); + if (kmap_ops && + !in_interrupt() && + paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { + arch_enter_lazy_mmu_mode(); + lazy = true; + } - list_for_each_entry(p, bucket, lru) { - if (page_private(p) == mfn) { - ret = p; - break; + for (i = 0; i < count; i++) { + unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); + unsigned long pfn = page_to_pfn(pages[i]); + + if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { + ret = -EINVAL; + goto out; } - } - spin_unlock_irqrestore(&m2p_override_lock, flags); + set_page_private(pages[i], INVALID_P2M_ENTRY); + WARN_ON(!PagePrivate(pages[i])); + ClearPagePrivate(pages[i]); + set_phys_to_machine(pfn, pages[i]->index); + + if (kmap_ops) + ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn); + if (ret) + goto out; + } +out: + if (lazy) + arch_leave_lazy_mmu_mode(); return ret; } +EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) { @@ -1192,79 +972,29 @@ EXPORT_SYMBOL_GPL(m2p_find_override_pfn); #include "debugfs.h" static int p2m_dump_show(struct seq_file *m, void *v) { - static const char * const level_name[] = { "top", "middle", - "entry", "abnormal", "error"}; -#define TYPE_IDENTITY 0 -#define TYPE_MISSING 1 -#define TYPE_PFN 2 -#define TYPE_UNKNOWN 3 static const char * const type_name[] = { - [TYPE_IDENTITY] = "identity", - [TYPE_MISSING] = "missing", - [TYPE_PFN] = "pfn", - [TYPE_UNKNOWN] = "abnormal"}; - unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; - unsigned int uninitialized_var(prev_level); - unsigned int uninitialized_var(prev_type); - - if (!p2m_top) - return 0; - - for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) { - unsigned topidx = p2m_top_index(pfn); - unsigned mididx = p2m_mid_index(pfn); - unsigned idx = p2m_index(pfn); - unsigned lvl, type; - - lvl = 4; - type = TYPE_UNKNOWN; - if (p2m_top[topidx] == p2m_mid_missing) { - lvl = 0; type = TYPE_MISSING; - } else if (p2m_top[topidx] == NULL) { - lvl = 0; type = TYPE_UNKNOWN; - } else if (p2m_top[topidx][mididx] == NULL) { - lvl = 1; type = TYPE_UNKNOWN; - } else if (p2m_top[topidx][mididx] == p2m_identity) { - lvl = 1; type = TYPE_IDENTITY; - } else if (p2m_top[topidx][mididx] == p2m_missing) { - lvl = 1; type = TYPE_MISSING; - } else if (p2m_top[topidx][mididx][idx] == 0) { - lvl = 2; type = TYPE_UNKNOWN; - } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) { - lvl = 2; type = TYPE_IDENTITY; - } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) { - lvl = 2; type = TYPE_MISSING; - } else if (p2m_top[topidx][mididx][idx] == pfn) { - lvl = 2; type = TYPE_PFN; - } else if (p2m_top[topidx][mididx][idx] != pfn) { - lvl = 2; type = TYPE_PFN; - } - if (pfn == 0) { - prev_level = lvl; - prev_type = type; - } - if (pfn == MAX_DOMAIN_PAGES-1) { - lvl = 3; - type = TYPE_UNKNOWN; - } - if (prev_type != type) { - seq_printf(m, " [0x%lx->0x%lx] %s\n", - prev_pfn_type, pfn, type_name[prev_type]); - prev_pfn_type = pfn; + [P2M_TYPE_IDENTITY] = "identity", + [P2M_TYPE_MISSING] = "missing", + [P2M_TYPE_PFN] = "pfn", + [P2M_TYPE_UNKNOWN] = "abnormal"}; + unsigned long pfn, first_pfn; + int type, prev_type; + + prev_type = xen_p2m_elem_type(0); + first_pfn = 0; + + for (pfn = 0; pfn < xen_p2m_size; pfn++) { + type = xen_p2m_elem_type(pfn); + if (type != prev_type) { + seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, + type_name[prev_type]); prev_type = type; - } - if (prev_level != lvl) { - seq_printf(m, " [0x%lx->0x%lx] level %s\n", - prev_pfn_level, pfn, level_name[prev_level]); - prev_pfn_level = pfn; - prev_level = lvl; + first_pfn = pfn; } } + seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, + type_name[prev_type]); return 0; -#undef TYPE_IDENTITY -#undef TYPE_MISSING -#undef TYPE_PFN -#undef TYPE_UNKNOWN } static int p2m_dump_open(struct inode *inode, struct file *filp) |