summaryrefslogtreecommitdiff
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c101
1 files changed, 76 insertions, 25 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a6e65d024995..cb474cc99645 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -932,6 +932,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
long mapped_ratio;
long distress;
long swap_tendency;
+ long imbalance;
if (zone_is_near_oom(zone))
goto force_reclaim_mapped;
@@ -967,6 +968,46 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
swap_tendency = mapped_ratio / 2 + distress + sc->swappiness;
/*
+ * If there's huge imbalance between active and inactive
+ * (think active 100 times larger than inactive) we should
+ * become more permissive, or the system will take too much
+ * cpu before it start swapping during memory pressure.
+ * Distress is about avoiding early-oom, this is about
+ * making swappiness graceful despite setting it to low
+ * values.
+ *
+ * Avoid div by zero with nr_inactive+1, and max resulting
+ * value is vm_total_pages.
+ */
+ imbalance = zone_page_state(zone, NR_ACTIVE);
+ imbalance /= zone_page_state(zone, NR_INACTIVE) + 1;
+
+ /*
+ * Reduce the effect of imbalance if swappiness is low,
+ * this means for a swappiness very low, the imbalance
+ * must be much higher than 100 for this logic to make
+ * the difference.
+ *
+ * Max temporary value is vm_total_pages*100.
+ */
+ imbalance *= (vm_swappiness + 1);
+ imbalance /= 100;
+
+ /*
+ * If not much of the ram is mapped, makes the imbalance
+ * less relevant, it's high priority we refill the inactive
+ * list with mapped pages only in presence of high ratio of
+ * mapped pages.
+ *
+ * Max temporary value is vm_total_pages*100.
+ */
+ imbalance *= mapped_ratio;
+ imbalance /= 100;
+
+ /* apply imbalance feedback to swap_tendency */
+ swap_tendency += imbalance;
+
+ /*
* Now use this metric to decide whether to start moving mapped
* memory onto the inactive list.
*/
@@ -1067,8 +1108,6 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
unsigned long nr_to_scan;
unsigned long nr_reclaimed = 0;
- atomic_inc(&zone->reclaim_in_progress);
-
/*
* Add one to `nr_to_scan' just to make sure that the kernel will
* slowly sift through the active list.
@@ -1107,8 +1146,6 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
}
throttle_vm_writeout(sc->gfp_mask);
-
- atomic_dec(&zone->reclaim_in_progress);
return nr_reclaimed;
}
@@ -1146,7 +1183,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
note_zone_scanning_priority(zone, priority);
- if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ if (zone_is_all_unreclaimable(zone) && priority != DEF_PRIORITY)
continue; /* Let kswapd poll it */
sc->all_unreclaimable = 0;
@@ -1245,7 +1282,7 @@ out:
*/
if (priority < 0)
priority = 0;
- for (i = 0; zones[i] != 0; i++) {
+ for (i = 0; zones[i] != NULL; i++) {
struct zone *zone = zones[i];
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
@@ -1327,7 +1364,8 @@ loop_again:
if (!populated_zone(zone))
continue;
- if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ if (zone_is_all_unreclaimable(zone) &&
+ priority != DEF_PRIORITY)
continue;
if (!zone_watermark_ok(zone, order, zone->pages_high,
@@ -1362,7 +1400,8 @@ loop_again:
if (!populated_zone(zone))
continue;
- if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+ if (zone_is_all_unreclaimable(zone) &&
+ priority != DEF_PRIORITY)
continue;
if (!zone_watermark_ok(zone, order, zone->pages_high,
@@ -1371,18 +1410,25 @@ loop_again:
temp_priority[i] = priority;
sc.nr_scanned = 0;
note_zone_scanning_priority(zone, priority);
- nr_reclaimed += shrink_zone(priority, zone, &sc);
+ /*
+ * We put equal pressure on every zone, unless one
+ * zone has way too many pages free already.
+ */
+ if (!zone_watermark_ok(zone, order, 8*zone->pages_high,
+ end_zone, 0))
+ nr_reclaimed += shrink_zone(priority, zone, &sc);
reclaim_state->reclaimed_slab = 0;
nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
lru_pages);
nr_reclaimed += reclaim_state->reclaimed_slab;
total_scanned += sc.nr_scanned;
- if (zone->all_unreclaimable)
+ if (zone_is_all_unreclaimable(zone))
continue;
if (nr_slab == 0 && zone->pages_scanned >=
(zone_page_state(zone, NR_ACTIVE)
+ zone_page_state(zone, NR_INACTIVE)) * 6)
- zone->all_unreclaimable = 1;
+ zone_set_flag(zone,
+ ZONE_ALL_UNRECLAIMABLE);
/*
* If we've done a decent amount of scanning and
* the reclaim ratio is low, start doing writepage
@@ -1548,7 +1594,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
if (!populated_zone(zone))
continue;
- if (zone->all_unreclaimable && prio != DEF_PRIORITY)
+ if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
continue;
/* For pass = 0 we don't shrink the active list */
@@ -1688,9 +1734,11 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
{
pg_data_t *pgdat;
cpumask_t mask;
+ int nid;
if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
- for_each_online_pgdat(pgdat) {
+ for_each_node_state(nid, N_HIGH_MEMORY) {
+ pgdat = NODE_DATA(nid);
mask = node_to_cpumask(pgdat->node_id);
if (any_online_cpu(mask) != NR_CPUS)
/* One of our CPUs online: restore mask */
@@ -1727,7 +1775,7 @@ static int __init kswapd_init(void)
int nid;
swap_setup();
- for_each_online_node(nid)
+ for_each_node_state(nid, N_HIGH_MEMORY)
kswapd_run(nid);
hotcpu_notifier(cpu_callback, 0);
return 0;
@@ -1847,8 +1895,8 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
{
- cpumask_t mask;
int node_id;
+ int ret;
/*
* Zone reclaim reclaims unmapped file backed pages and
@@ -1866,15 +1914,13 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
<= zone->min_slab_pages)
return 0;
+ if (zone_is_all_unreclaimable(zone))
+ return 0;
+
/*
- * Avoid concurrent zone reclaims, do not reclaim in a zone that does
- * not have reclaimable pages and if we should not delay the allocation
- * then do not scan.
+ * Do not scan if the allocation should not be delayed.
*/
- if (!(gfp_mask & __GFP_WAIT) ||
- zone->all_unreclaimable ||
- atomic_read(&zone->reclaim_in_progress) > 0 ||
- (current->flags & PF_MEMALLOC))
+ if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC))
return 0;
/*
@@ -1884,9 +1930,14 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
* as wide as possible.
*/
node_id = zone_to_nid(zone);
- mask = node_to_cpumask(node_id);
- if (!cpus_empty(mask) && node_id != numa_node_id())
+ if (node_state(node_id, N_CPU) && node_id != numa_node_id())
+ return 0;
+
+ if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED))
return 0;
- return __zone_reclaim(zone, gfp_mask, order);
+ ret = __zone_reclaim(zone, gfp_mask, order);
+ zone_clear_flag(zone, ZONE_RECLAIM_LOCKED);
+
+ return ret;
}
#endif