summaryrefslogtreecommitdiff
path: root/mm/compaction.c
diff options
context:
space:
mode:
authorTony Lindgren <tony@atomide.com>2016-03-30 20:36:06 +0300
committerTony Lindgren <tony@atomide.com>2016-03-30 20:36:06 +0300
commit1809de7e7d37c585e01a1bcc583ea92b78fc759d (patch)
tree76c5b35c2b04eafce86a1a729c02ab705eba44bc /mm/compaction.c
parentebf24414809200915b9ddf7f109bba7c278c8210 (diff)
parent3ca4a238106dedc285193ee47f494a6584b6fd2f (diff)
downloadlinux-1809de7e7d37c585e01a1bcc583ea92b78fc759d.tar.xz
Merge tag 'for-v4.6-rc/omap-fixes-a' of git://git.kernel.org/pub/scm/linux/kernel/git/pjw/omap-pending into omap-for-v4.6/fixes
ARM: OMAP2+: first hwmod fix for v4.6-rc Fix a longstanding bug in the hwmod code that could cause hardware SYSCONFIG register values to not match the kernel's idea of what they should be, and that could result in lower performance during IP block idle entry. Basic build, boot, and PM test logs are available here: http://www.pwsan.com/omap/testlogs/omap-hwmod-fixes-a-for-v4.6-rc/20160326231727/
Diffstat (limited to 'mm/compaction.c')
-rw-r--r--mm/compaction.c325
1 files changed, 262 insertions, 63 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 585de54dbe8c..ccf97b02b85f 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -7,6 +7,7 @@
*
* Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
*/
+#include <linux/cpu.h>
#include <linux/swap.h>
#include <linux/migrate.h>
#include <linux/compaction.h>
@@ -17,6 +18,8 @@
#include <linux/balloon_compaction.h>
#include <linux/page-isolation.h>
#include <linux/kasan.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
#include "internal.h"
#ifdef CONFIG_COMPACTION
@@ -71,49 +74,6 @@ static inline bool migrate_async_suitable(int migratetype)
return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
}
-/*
- * Check that the whole (or subset of) a pageblock given by the interval of
- * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
- * with the migration of free compaction scanner. The scanners then need to
- * use only pfn_valid_within() check for arches that allow holes within
- * pageblocks.
- *
- * Return struct page pointer of start_pfn, or NULL if checks were not passed.
- *
- * It's possible on some configurations to have a setup like node0 node1 node0
- * i.e. it's possible that all pages within a zones range of pages do not
- * belong to a single zone. We assume that a border between node0 and node1
- * can occur within a single pageblock, but not a node0 node1 node0
- * interleaving within a single pageblock. It is therefore sufficient to check
- * the first and last page of a pageblock and avoid checking each individual
- * page in a pageblock.
- */
-static struct page *pageblock_pfn_to_page(unsigned long start_pfn,
- unsigned long end_pfn, struct zone *zone)
-{
- struct page *start_page;
- struct page *end_page;
-
- /* end_pfn is one past the range we are checking */
- end_pfn--;
-
- if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
- return NULL;
-
- start_page = pfn_to_page(start_pfn);
-
- if (page_zone(start_page) != zone)
- return NULL;
-
- end_page = pfn_to_page(end_pfn);
-
- /* This gives a shorter code than deriving page_zone(end_page) */
- if (page_zone_id(start_page) != page_zone_id(end_page))
- return NULL;
-
- return start_page;
-}
-
#ifdef CONFIG_COMPACTION
/* Do not skip compaction more than 64 times */
@@ -200,7 +160,8 @@ static void reset_cached_positions(struct zone *zone)
{
zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
- zone->compact_cached_free_pfn = zone_end_pfn(zone);
+ zone->compact_cached_free_pfn =
+ round_down(zone_end_pfn(zone) - 1, pageblock_nr_pages);
}
/*
@@ -554,13 +515,17 @@ unsigned long
isolate_freepages_range(struct compact_control *cc,
unsigned long start_pfn, unsigned long end_pfn)
{
- unsigned long isolated, pfn, block_end_pfn;
+ unsigned long isolated, pfn, block_start_pfn, block_end_pfn;
LIST_HEAD(freelist);
pfn = start_pfn;
+ block_start_pfn = pfn & ~(pageblock_nr_pages - 1);
+ if (block_start_pfn < cc->zone->zone_start_pfn)
+ block_start_pfn = cc->zone->zone_start_pfn;
block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
for (; pfn < end_pfn; pfn += isolated,
+ block_start_pfn = block_end_pfn,
block_end_pfn += pageblock_nr_pages) {
/* Protect pfn from changing by isolate_freepages_block */
unsigned long isolate_start_pfn = pfn;
@@ -573,11 +538,13 @@ isolate_freepages_range(struct compact_control *cc,
* scanning range to right one.
*/
if (pfn >= block_end_pfn) {
+ block_start_pfn = pfn & ~(pageblock_nr_pages - 1);
block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
block_end_pfn = min(block_end_pfn, end_pfn);
}
- if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone))
+ if (!pageblock_pfn_to_page(block_start_pfn,
+ block_end_pfn, cc->zone))
break;
isolated = isolate_freepages_block(cc, &isolate_start_pfn,
@@ -863,18 +830,23 @@ unsigned long
isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
unsigned long end_pfn)
{
- unsigned long pfn, block_end_pfn;
+ unsigned long pfn, block_start_pfn, block_end_pfn;
/* Scan block by block. First and last block may be incomplete */
pfn = start_pfn;
+ block_start_pfn = pfn & ~(pageblock_nr_pages - 1);
+ if (block_start_pfn < cc->zone->zone_start_pfn)
+ block_start_pfn = cc->zone->zone_start_pfn;
block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
for (; pfn < end_pfn; pfn = block_end_pfn,
+ block_start_pfn = block_end_pfn,
block_end_pfn += pageblock_nr_pages) {
block_end_pfn = min(block_end_pfn, end_pfn);
- if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone))
+ if (!pageblock_pfn_to_page(block_start_pfn,
+ block_end_pfn, cc->zone))
continue;
pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
@@ -1103,7 +1075,9 @@ int sysctl_compact_unevictable_allowed __read_mostly = 1;
static isolate_migrate_t isolate_migratepages(struct zone *zone,
struct compact_control *cc)
{
- unsigned long low_pfn, end_pfn;
+ unsigned long block_start_pfn;
+ unsigned long block_end_pfn;
+ unsigned long low_pfn;
unsigned long isolate_start_pfn;
struct page *page;
const isolate_mode_t isolate_mode =
@@ -1115,16 +1089,21 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
* initialized by compact_zone()
*/
low_pfn = cc->migrate_pfn;
+ block_start_pfn = cc->migrate_pfn & ~(pageblock_nr_pages - 1);
+ if (block_start_pfn < zone->zone_start_pfn)
+ block_start_pfn = zone->zone_start_pfn;
/* Only scan within a pageblock boundary */
- end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages);
+ block_end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages);
/*
* Iterate over whole pageblocks until we find the first suitable.
* Do not cross the free scanner.
*/
- for (; end_pfn <= cc->free_pfn;
- low_pfn = end_pfn, end_pfn += pageblock_nr_pages) {
+ for (; block_end_pfn <= cc->free_pfn;
+ low_pfn = block_end_pfn,
+ block_start_pfn = block_end_pfn,
+ block_end_pfn += pageblock_nr_pages) {
/*
* This can potentially iterate a massively long zone with
@@ -1135,7 +1114,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
&& compact_should_abort(cc))
break;
- page = pageblock_pfn_to_page(low_pfn, end_pfn, zone);
+ page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
+ zone);
if (!page)
continue;
@@ -1154,8 +1134,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
/* Perform the isolation */
isolate_start_pfn = low_pfn;
- low_pfn = isolate_migratepages_block(cc, low_pfn, end_pfn,
- isolate_mode);
+ low_pfn = isolate_migratepages_block(cc, low_pfn,
+ block_end_pfn, isolate_mode);
if (!low_pfn || cc->contended) {
acct_isolated(zone, cc);
@@ -1211,11 +1191,11 @@ static int __compact_finished(struct zone *zone, struct compact_control *cc,
/*
* Mark that the PG_migrate_skip information should be cleared
- * by kswapd when it goes to sleep. kswapd does not set the
+ * by kswapd when it goes to sleep. kcompactd does not set the
* flag itself as the decision to be clear should be directly
* based on an allocation request.
*/
- if (!current_is_kswapd())
+ if (cc->direct_compaction)
zone->compact_blockskip_flush = true;
return COMPACT_COMPLETE;
@@ -1358,10 +1338,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
/*
* Clear pageblock skip if there were failures recently and compaction
- * is about to be retried after being deferred. kswapd does not do
- * this reset as it'll reset the cached information when going to sleep.
+ * is about to be retried after being deferred.
*/
- if (compaction_restarting(zone, cc->order) && !current_is_kswapd())
+ if (compaction_restarting(zone, cc->order))
__reset_isolation_suitable(zone);
/*
@@ -1371,11 +1350,11 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
*/
cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync];
cc->free_pfn = zone->compact_cached_free_pfn;
- if (cc->free_pfn < start_pfn || cc->free_pfn > end_pfn) {
- cc->free_pfn = end_pfn & ~(pageblock_nr_pages-1);
+ if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
+ cc->free_pfn = round_down(end_pfn - 1, pageblock_nr_pages);
zone->compact_cached_free_pfn = cc->free_pfn;
}
- if (cc->migrate_pfn < start_pfn || cc->migrate_pfn > end_pfn) {
+ if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
cc->migrate_pfn = start_pfn;
zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
@@ -1497,6 +1476,7 @@ static unsigned long compact_zone_order(struct zone *zone, int order,
.mode = mode,
.alloc_flags = alloc_flags,
.classzone_idx = classzone_idx,
+ .direct_compaction = true,
};
INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages);
@@ -1759,4 +1739,223 @@ void compaction_unregister_node(struct node *node)
}
#endif /* CONFIG_SYSFS && CONFIG_NUMA */
+static inline bool kcompactd_work_requested(pg_data_t *pgdat)
+{
+ return pgdat->kcompactd_max_order > 0;
+}
+
+static bool kcompactd_node_suitable(pg_data_t *pgdat)
+{
+ int zoneid;
+ struct zone *zone;
+ enum zone_type classzone_idx = pgdat->kcompactd_classzone_idx;
+
+ for (zoneid = 0; zoneid < classzone_idx; zoneid++) {
+ zone = &pgdat->node_zones[zoneid];
+
+ if (!populated_zone(zone))
+ continue;
+
+ if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0,
+ classzone_idx) == COMPACT_CONTINUE)
+ return true;
+ }
+
+ return false;
+}
+
+static void kcompactd_do_work(pg_data_t *pgdat)
+{
+ /*
+ * With no special task, compact all zones so that a page of requested
+ * order is allocatable.
+ */
+ int zoneid;
+ struct zone *zone;
+ struct compact_control cc = {
+ .order = pgdat->kcompactd_max_order,
+ .classzone_idx = pgdat->kcompactd_classzone_idx,
+ .mode = MIGRATE_SYNC_LIGHT,
+ .ignore_skip_hint = true,
+
+ };
+ bool success = false;
+
+ trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
+ cc.classzone_idx);
+ count_vm_event(KCOMPACTD_WAKE);
+
+ for (zoneid = 0; zoneid < cc.classzone_idx; zoneid++) {
+ int status;
+
+ zone = &pgdat->node_zones[zoneid];
+ if (!populated_zone(zone))
+ continue;
+
+ if (compaction_deferred(zone, cc.order))
+ continue;
+
+ if (compaction_suitable(zone, cc.order, 0, zoneid) !=
+ COMPACT_CONTINUE)
+ continue;
+
+ cc.nr_freepages = 0;
+ cc.nr_migratepages = 0;
+ cc.zone = zone;
+ INIT_LIST_HEAD(&cc.freepages);
+ INIT_LIST_HEAD(&cc.migratepages);
+
+ status = compact_zone(zone, &cc);
+
+ if (zone_watermark_ok(zone, cc.order, low_wmark_pages(zone),
+ cc.classzone_idx, 0)) {
+ success = true;
+ compaction_defer_reset(zone, cc.order, false);
+ } else if (status == COMPACT_COMPLETE) {
+ /*
+ * We use sync migration mode here, so we defer like
+ * sync direct compaction does.
+ */
+ defer_compaction(zone, cc.order);
+ }
+
+ VM_BUG_ON(!list_empty(&cc.freepages));
+ VM_BUG_ON(!list_empty(&cc.migratepages));
+ }
+
+ /*
+ * Regardless of success, we are done until woken up next. But remember
+ * the requested order/classzone_idx in case it was higher/tighter than
+ * our current ones
+ */
+ if (pgdat->kcompactd_max_order <= cc.order)
+ pgdat->kcompactd_max_order = 0;
+ if (pgdat->kcompactd_classzone_idx >= cc.classzone_idx)
+ pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
+}
+
+void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
+{
+ if (!order)
+ return;
+
+ if (pgdat->kcompactd_max_order < order)
+ pgdat->kcompactd_max_order = order;
+
+ if (pgdat->kcompactd_classzone_idx > classzone_idx)
+ pgdat->kcompactd_classzone_idx = classzone_idx;
+
+ if (!waitqueue_active(&pgdat->kcompactd_wait))
+ return;
+
+ if (!kcompactd_node_suitable(pgdat))
+ return;
+
+ trace_mm_compaction_wakeup_kcompactd(pgdat->node_id, order,
+ classzone_idx);
+ wake_up_interruptible(&pgdat->kcompactd_wait);
+}
+
+/*
+ * The background compaction daemon, started as a kernel thread
+ * from the init process.
+ */
+static int kcompactd(void *p)
+{
+ pg_data_t *pgdat = (pg_data_t*)p;
+ struct task_struct *tsk = current;
+
+ const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
+
+ if (!cpumask_empty(cpumask))
+ set_cpus_allowed_ptr(tsk, cpumask);
+
+ set_freezable();
+
+ pgdat->kcompactd_max_order = 0;
+ pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
+
+ while (!kthread_should_stop()) {
+ trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
+ wait_event_freezable(pgdat->kcompactd_wait,
+ kcompactd_work_requested(pgdat));
+
+ kcompactd_do_work(pgdat);
+ }
+
+ return 0;
+}
+
+/*
+ * This kcompactd start function will be called by init and node-hot-add.
+ * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added.
+ */
+int kcompactd_run(int nid)
+{
+ pg_data_t *pgdat = NODE_DATA(nid);
+ int ret = 0;
+
+ if (pgdat->kcompactd)
+ return 0;
+
+ pgdat->kcompactd = kthread_run(kcompactd, pgdat, "kcompactd%d", nid);
+ if (IS_ERR(pgdat->kcompactd)) {
+ pr_err("Failed to start kcompactd on node %d\n", nid);
+ ret = PTR_ERR(pgdat->kcompactd);
+ pgdat->kcompactd = NULL;
+ }
+ return ret;
+}
+
+/*
+ * Called by memory hotplug when all memory in a node is offlined. Caller must
+ * hold mem_hotplug_begin/end().
+ */
+void kcompactd_stop(int nid)
+{
+ struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd;
+
+ if (kcompactd) {
+ kthread_stop(kcompactd);
+ NODE_DATA(nid)->kcompactd = NULL;
+ }
+}
+
+/*
+ * It's optimal to keep kcompactd on the same CPUs as their memory, but
+ * not required for correctness. So if the last cpu in a node goes
+ * away, we get changed to run anywhere: as the first one comes back,
+ * restore their cpu bindings.
+ */
+static int cpu_callback(struct notifier_block *nfb, unsigned long action,
+ void *hcpu)
+{
+ int nid;
+
+ if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
+ for_each_node_state(nid, N_MEMORY) {
+ pg_data_t *pgdat = NODE_DATA(nid);
+ const struct cpumask *mask;
+
+ mask = cpumask_of_node(pgdat->node_id);
+
+ if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
+ /* One of our CPUs online: restore mask */
+ set_cpus_allowed_ptr(pgdat->kcompactd, mask);
+ }
+ }
+ return NOTIFY_OK;
+}
+
+static int __init kcompactd_init(void)
+{
+ int nid;
+
+ for_each_node_state(nid, N_MEMORY)
+ kcompactd_run(nid);
+ hotcpu_notifier(cpu_callback, 0);
+ return 0;
+}
+subsys_initcall(kcompactd_init)
+
#endif /* CONFIG_COMPACTION */