From 53b87cf088e2ea68d7c59619d0214cc15bb76133 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 7 Sep 2012 18:23:51 +0100 Subject: x86, mm: Include the entire kernel memory map in trampoline_pgd There are various pieces of code in arch/x86 that require a page table with an identity mapping. Make trampoline_pgd a proper kernel page table, it currently only includes the kernel text and module space mapping. One new feature of trampoline_pgd is that it now has mappings for the physical I/O device addresses, which are inserted at ioremap() time. Some broken implementations of EFI firmware require these mappings to always be around. Acked-by: Jan Beulich Signed-off-by: Matt Fleming --- arch/x86/mm/init_64.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 2b6b4a3c8beb..fd4404f19d39 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -108,13 +108,13 @@ void sync_global_pgds(unsigned long start, unsigned long end) for (address = start; address <= end; address += PGDIR_SIZE) { const pgd_t *pgd_ref = pgd_offset_k(address); struct page *page; + pgd_t *pgd; if (pgd_none(*pgd_ref)) continue; spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { - pgd_t *pgd; spinlock_t *pgt_lock; pgd = (pgd_t *)page_address(page) + pgd_index(address); @@ -130,6 +130,13 @@ void sync_global_pgds(unsigned long start, unsigned long end) spin_unlock(pgt_lock); } + + pgd = __va(real_mode_header->trampoline_pgd); + pgd += pgd_index(address); + + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); + spin_unlock(&pgd_lock); } } -- cgit v1.2.3 From 4b0ef1fe8a626f0ba7f649764f979d0dc9eab86b Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 12 Dec 2012 13:51:46 -0800 Subject: page_alloc: use N_MEMORY instead N_HIGH_MEMORY change the node_states initialization N_HIGH_MEMORY stands for the nodes that has normal or high memory. N_MEMORY stands for the nodes that has any memory. The code here need to handle with the nodes which have memory, we should use N_MEMORY instead. Since we introduced N_MEMORY, we update the initialization of node_states. Signed-off-by: Lai Jiangshan Signed-off-by: Lin Feng Signed-off-by: Wen Congyang Cc: Christoph Lameter Cc: Hillf Danton Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 4 +++- mm/page_alloc.c | 40 ++++++++++++++++++++++------------------ 2 files changed, 25 insertions(+), 19 deletions(-) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3baff255adac..2ead3c8a4c84 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -630,7 +630,9 @@ void __init paging_init(void) * numa support is not compiled in, and later node_set_state * will not set it back. */ - node_clear_state(0, N_NORMAL_MEMORY); + node_clear_state(0, N_MEMORY); + if (N_MEMORY != N_NORMAL_MEMORY) + node_clear_state(0, N_NORMAL_MEMORY); zone_sizes_init(); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4171cd4f8257..35727168896b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1695,7 +1695,7 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, * * If the zonelist cache is present in the passed in zonelist, then * returns a pointer to the allowed node mask (either the current - * tasks mems_allowed, or node_states[N_HIGH_MEMORY].) + * tasks mems_allowed, or node_states[N_MEMORY].) * * If the zonelist cache is not available for this zonelist, does * nothing and returns NULL. @@ -1724,7 +1724,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ? &cpuset_current_mems_allowed : - &node_states[N_HIGH_MEMORY]; + &node_states[N_MEMORY]; return allowednodes; } @@ -3238,7 +3238,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) return node; } - for_each_node_state(n, N_HIGH_MEMORY) { + for_each_node_state(n, N_MEMORY) { /* Don't want a node to appear more than once */ if (node_isset(n, *used_node_mask)) @@ -3380,7 +3380,7 @@ static int default_zonelist_order(void) * local memory, NODE_ORDER may be suitable. */ average_size = total_size / - (nodes_weight(node_states[N_HIGH_MEMORY]) + 1); + (nodes_weight(node_states[N_MEMORY]) + 1); for_each_online_node(nid) { low_kmem_size = 0; total_size = 0; @@ -4731,7 +4731,7 @@ unsigned long __init find_min_pfn_with_active_regions(void) /* * early_calculate_totalpages() * Sum pages in active regions for movable zone. - * Populate N_HIGH_MEMORY for calculating usable_nodes. + * Populate N_MEMORY for calculating usable_nodes. */ static unsigned long __init early_calculate_totalpages(void) { @@ -4744,7 +4744,7 @@ static unsigned long __init early_calculate_totalpages(void) totalpages += pages; if (pages) - node_set_state(nid, N_HIGH_MEMORY); + node_set_state(nid, N_MEMORY); } return totalpages; } @@ -4761,9 +4761,9 @@ static void __init find_zone_movable_pfns_for_nodes(void) unsigned long usable_startpfn; unsigned long kernelcore_node, kernelcore_remaining; /* save the state before borrow the nodemask */ - nodemask_t saved_node_state = node_states[N_HIGH_MEMORY]; + nodemask_t saved_node_state = node_states[N_MEMORY]; unsigned long totalpages = early_calculate_totalpages(); - int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]); + int usable_nodes = nodes_weight(node_states[N_MEMORY]); /* * If movablecore was specified, calculate what size of @@ -4798,7 +4798,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) restart: /* Spread kernelcore memory as evenly as possible throughout nodes */ kernelcore_node = required_kernelcore / usable_nodes; - for_each_node_state(nid, N_HIGH_MEMORY) { + for_each_node_state(nid, N_MEMORY) { unsigned long start_pfn, end_pfn; /* @@ -4890,23 +4890,27 @@ restart: out: /* restore the node_state */ - node_states[N_HIGH_MEMORY] = saved_node_state; + node_states[N_MEMORY] = saved_node_state; } -/* Any regular memory on that node ? */ -static void __init check_for_regular_memory(pg_data_t *pgdat) +/* Any regular or high memory on that node ? */ +static void check_for_memory(pg_data_t *pgdat, int nid) { -#ifdef CONFIG_HIGHMEM enum zone_type zone_type; - for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) { + if (N_MEMORY == N_NORMAL_MEMORY) + return; + + for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) { struct zone *zone = &pgdat->node_zones[zone_type]; if (zone->present_pages) { - node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY); + node_set_state(nid, N_HIGH_MEMORY); + if (N_NORMAL_MEMORY != N_HIGH_MEMORY && + zone_type <= ZONE_NORMAL) + node_set_state(nid, N_NORMAL_MEMORY); break; } } -#endif } /** @@ -4989,8 +4993,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) /* Any memory on that node */ if (pgdat->node_present_pages) - node_set_state(nid, N_HIGH_MEMORY); - check_for_regular_memory(pgdat); + node_set_state(nid, N_MEMORY); + check_for_memory(pgdat, nid); } } -- cgit v1.2.3 From be354f40812314dee2b1e3aa272528c056bb827d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 15 Dec 2012 12:29:54 -0800 Subject: Revert "x86, mm: Include the entire kernel memory map in trampoline_pgd" This reverts commit 53b87cf088e2ea68d7c59619d0214cc15bb76133. It causes odd bootup problems on x86-64. Markus Trippelsdorf gets a repeatable oops, and I see a non-repeatable oops (or constant stream of messages that scroll off too quickly to read) that seems to go away with this commit reverted. So we don't know exactly what is wrong with the commit, but it's definitely problematic, and worth reverting sooner rather than later. Bisected-by: Markus Trippelsdorf Cc: H Peter Anvin Cc: Jan Beulich Cc: Matt Fleming Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 9 +--- arch/x86/mm/ioremap.c | 105 ----------------------------------------------- arch/x86/realmode/init.c | 17 +------- 3 files changed, 3 insertions(+), 128 deletions(-) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 07519a120449..2ead3c8a4c84 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -108,13 +108,13 @@ void sync_global_pgds(unsigned long start, unsigned long end) for (address = start; address <= end; address += PGDIR_SIZE) { const pgd_t *pgd_ref = pgd_offset_k(address); struct page *page; - pgd_t *pgd; if (pgd_none(*pgd_ref)) continue; spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { + pgd_t *pgd; spinlock_t *pgt_lock; pgd = (pgd_t *)page_address(page) + pgd_index(address); @@ -130,13 +130,6 @@ void sync_global_pgds(unsigned long start, unsigned long end) spin_unlock(pgt_lock); } - - pgd = __va(real_mode_header->trampoline_pgd); - pgd += pgd_index(address); - - if (pgd_none(*pgd)) - set_pgd(pgd, *pgd_ref); - spin_unlock(&pgd_lock); } } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index e190f7b56653..78fe3f1ac49f 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -50,107 +50,6 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, return err; } -#ifdef CONFIG_X86_64 -static void ident_pte_range(unsigned long paddr, unsigned long vaddr, - pmd_t *ppmd, pmd_t *vpmd, unsigned long end) -{ - pte_t *ppte = pte_offset_kernel(ppmd, paddr); - pte_t *vpte = pte_offset_kernel(vpmd, vaddr); - - do { - set_pte(ppte, *vpte); - } while (ppte++, vpte++, vaddr += PAGE_SIZE, vaddr != end); -} - -static int ident_pmd_range(unsigned long paddr, unsigned long vaddr, - pud_t *ppud, pud_t *vpud, unsigned long end) -{ - pmd_t *ppmd = pmd_offset(ppud, paddr); - pmd_t *vpmd = pmd_offset(vpud, vaddr); - unsigned long next; - - do { - next = pmd_addr_end(vaddr, end); - - if (!pmd_present(*ppmd)) { - pte_t *ppte = (pte_t *)get_zeroed_page(GFP_KERNEL); - if (!ppte) - return 1; - - set_pmd(ppmd, __pmd(_KERNPG_TABLE | __pa(ppte))); - } - - ident_pte_range(paddr, vaddr, ppmd, vpmd, next); - } while (ppmd++, vpmd++, vaddr = next, vaddr != end); - - return 0; -} - -static int ident_pud_range(unsigned long paddr, unsigned long vaddr, - pgd_t *ppgd, pgd_t *vpgd, unsigned long end) -{ - pud_t *ppud = pud_offset(ppgd, paddr); - pud_t *vpud = pud_offset(vpgd, vaddr); - unsigned long next; - - do { - next = pud_addr_end(vaddr, end); - - if (!pud_present(*ppud)) { - pmd_t *ppmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); - if (!ppmd) - return 1; - - set_pud(ppud, __pud(_KERNPG_TABLE | __pa(ppmd))); - } - - if (ident_pmd_range(paddr, vaddr, ppud, vpud, next)) - return 1; - } while (ppud++, vpud++, vaddr = next, vaddr != end); - - return 0; -} - -static int insert_identity_mapping(resource_size_t paddr, unsigned long vaddr, - unsigned long size) -{ - unsigned long end = vaddr + size; - unsigned long next; - pgd_t *vpgd, *ppgd; - - /* Don't map over the guard hole. */ - if (paddr >= 0x800000000000 || paddr + size > 0x800000000000) - return 1; - - ppgd = __va(real_mode_header->trampoline_pgd) + pgd_index(paddr); - - vpgd = pgd_offset_k(vaddr); - do { - next = pgd_addr_end(vaddr, end); - - if (!pgd_present(*ppgd)) { - pud_t *ppud = (pud_t *)get_zeroed_page(GFP_KERNEL); - if (!ppud) - return 1; - - set_pgd(ppgd, __pgd(_KERNPG_TABLE | __pa(ppud))); - } - - if (ident_pud_range(paddr, vaddr, ppgd, vpgd, next)) - return 1; - } while (ppgd++, vpgd++, vaddr = next, vaddr != end); - - return 0; -} -#else -static inline int insert_identity_mapping(resource_size_t paddr, - unsigned long vaddr, - unsigned long size) -{ - return 0; -} -#endif /* CONFIG_X86_64 */ - /* * Remap an arbitrary physical address space into the kernel virtual * address space. Needed when the kernel wants to access high addresses @@ -264,10 +163,6 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, ret_addr = (void __iomem *) (vaddr + offset); mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); - if (insert_identity_mapping(phys_addr, vaddr, size)) - printk(KERN_WARNING "ioremap: unable to map 0x%llx in identity pagetable\n", - (unsigned long long)phys_addr); - /* * Check if the request spans more than any BAR in the iomem resource * tree. diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 8e6ab6137852..cbca565af5bd 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -78,21 +78,8 @@ void __init setup_real_mode(void) *trampoline_cr4_features = read_cr4(); trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); - - /* - * Create an identity mapping for all of physical memory. - */ - for (i = 0; i <= pgd_index(max_pfn << PAGE_SHIFT); i++) { - int index = pgd_index(PAGE_OFFSET) + i; - - trampoline_pgd[i] = (u64)pgd_val(swapper_pg_dir[index]); - } - - /* - * Copy the upper-half of the kernel pages tables. - */ - for (i = pgd_index(PAGE_OFFSET); i < PTRS_PER_PGD; i++) - trampoline_pgd[i] = (u64)pgd_val(swapper_pg_dir[i]); + trampoline_pgd[0] = __pa(level3_ident_pgt) + _KERNPG_TABLE; + trampoline_pgd[511] = __pa(level3_kernel_pgt) + _KERNPG_TABLE; #endif } -- cgit v1.2.3