summaryrefslogtreecommitdiff
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@linux.intel.com>2018-10-27 01:07:52 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-27 02:26:34 +0300
commit966cf44f637e6aeea7e3d01ba004bf8b5beac78f (patch)
tree41b6c1bacf9f3151bc231691541823d05e4465e8 /mm/page_alloc.c
parentd483da5bc78b86fe4200d2947f193a745f711713 (diff)
downloadlinux-966cf44f637e6aeea7e3d01ba004bf8b5beac78f.tar.xz
mm: defer ZONE_DEVICE page initialization to the point where we init pgmap
The ZONE_DEVICE pages were being initialized in two locations. One was with the memory_hotplug lock held and another was outside of that lock. The problem with this is that it was nearly doubling the memory initialization time. Instead of doing this twice, once while holding a global lock and once without, I am opting to defer the initialization to the one outside of the lock. This allows us to avoid serializing the overhead for memory init and we can instead focus on per-node init times. One issue I encountered is that devm_memremap_pages and hmm_devmmem_pages_create were initializing only the pgmap field the same way. One wasn't initializing hmm_data, and the other was initializing it to a poison value. Since this is something that is exposed to the driver in the case of hmm I am opting for a third option and just initializing hmm_data to 0 since this is going to be exposed to unknown third party drivers. [alexander.h.duyck@linux.intel.com: fix reference count for pgmap in devm_memremap_pages] Link: http://lkml.kernel.org/r/20181008233404.1909.37302.stgit@localhost.localdomain Link: http://lkml.kernel.org/r/20180925202053.3576.66039.stgit@localhost.localdomain Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com> Reviewed-by: Pavel Tatashin <pavel.tatashin@microsoft.com> Tested-by: Dan Williams <dan.j.williams@intel.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Michal Hocko <mhocko@suse.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c92
1 files changed, 89 insertions, 3 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cee1abf85d72..d73ff2188d72 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5465,12 +5465,23 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
if (highest_memmap_pfn < end_pfn - 1)
highest_memmap_pfn = end_pfn - 1;
+#ifdef CONFIG_ZONE_DEVICE
/*
* Honor reservation requested by the driver for this ZONE_DEVICE
- * memory
+ * memory. We limit the total number of pages to initialize to just
+ * those that might contain the memory mapping. We will defer the
+ * ZONE_DEVICE page initialization until after we have released
+ * the hotplug lock.
*/
- if (altmap && start_pfn == altmap->base_pfn)
- start_pfn += altmap->reserve;
+ if (zone == ZONE_DEVICE) {
+ if (!altmap)
+ return;
+
+ if (start_pfn == altmap->base_pfn)
+ start_pfn += altmap->reserve;
+ end_pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
+ }
+#endif
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
/*
@@ -5537,6 +5548,81 @@ not_early:
}
}
+#ifdef CONFIG_ZONE_DEVICE
+void __ref memmap_init_zone_device(struct zone *zone,
+ unsigned long start_pfn,
+ unsigned long size,
+ struct dev_pagemap *pgmap)
+{
+ unsigned long pfn, end_pfn = start_pfn + size;
+ struct pglist_data *pgdat = zone->zone_pgdat;
+ unsigned long zone_idx = zone_idx(zone);
+ unsigned long start = jiffies;
+ int nid = pgdat->node_id;
+
+ if (WARN_ON_ONCE(!pgmap || !is_dev_zone(zone)))
+ return;
+
+ /*
+ * The call to memmap_init_zone should have already taken care
+ * of the pages reserved for the memmap, so we can just jump to
+ * the end of that region and start processing the device pages.
+ */
+ if (pgmap->altmap_valid) {
+ struct vmem_altmap *altmap = &pgmap->altmap;
+
+ start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
+ size = end_pfn - start_pfn;
+ }
+
+ for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+ struct page *page = pfn_to_page(pfn);
+
+ __init_single_page(page, pfn, zone_idx, nid);
+
+ /*
+ * Mark page reserved as it will need to wait for onlining
+ * phase for it to be fully associated with a zone.
+ *
+ * We can use the non-atomic __set_bit operation for setting
+ * the flag as we are still initializing the pages.
+ */
+ __SetPageReserved(page);
+
+ /*
+ * ZONE_DEVICE pages union ->lru with a ->pgmap back
+ * pointer and hmm_data. It is a bug if a ZONE_DEVICE
+ * page is ever freed or placed on a driver-private list.
+ */
+ page->pgmap = pgmap;
+ page->hmm_data = 0;
+
+ /*
+ * Mark the block movable so that blocks are reserved for
+ * movable at startup. This will force kernel allocations
+ * to reserve their blocks rather than leaking throughout
+ * the address space during boot when many long-lived
+ * kernel allocations are made.
+ *
+ * bitmap is created for zone's valid pfn range. but memmap
+ * can be created for invalid pages (for alignment)
+ * check here not to call set_pageblock_migratetype() against
+ * pfn out of zone.
+ *
+ * Please note that MEMMAP_HOTPLUG path doesn't clear memmap
+ * because this is done early in sparse_add_one_section
+ */
+ if (!(pfn & (pageblock_nr_pages - 1))) {
+ set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+ cond_resched();
+ }
+ }
+
+ pr_info("%s initialised, %lu pages in %ums\n", dev_name(pgmap->dev),
+ size, jiffies_to_msecs(jiffies - start));
+}
+
+#endif
static void __meminit zone_init_free_lists(struct zone *zone)
{
unsigned int order, t;