diff options
-rw-r--r-- | mm/backing-dev.c | 19 | ||||
-rw-r--r-- | mm/vmstat.c | 6 |
2 files changed, 20 insertions, 5 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 2b49dd2bb6f5..17f54030263d 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -847,8 +847,9 @@ EXPORT_SYMBOL(congestion_wait); * jiffies for either a BDI to exit congestion of the given @sync queue * or a write to complete. * - * In the absence of zone congestion, cond_resched() is called to yield - * the processor if necessary but otherwise does not sleep. + * In the absence of zone congestion, a short sleep or a cond_resched is + * performed to yield the processor and to allow other subsystems to make + * a forward progress. * * The return value is 0 if the sleep is for the full timeout. Otherwise, * it is the number of jiffies that were still remaining when the function @@ -868,7 +869,19 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout) */ if (atomic_read(&nr_bdi_congested[sync]) == 0 || !zone_is_reclaim_congested(zone)) { - cond_resched(); + + /* + * Memory allocation/reclaim might be called from a WQ + * context and the current implementation of the WQ + * concurrency control doesn't recognize that a particular + * WQ is congested if the worker thread is looping without + * ever sleeping. Therefore we have to do a short sleep + * here rather than calling cond_resched(). + */ + if (current->flags & PF_WQ_WORKER) + schedule_timeout(1); + else + cond_resched(); /* In case we scheduled, work out time remaining */ ret = timeout - (jiffies - start); diff --git a/mm/vmstat.c b/mm/vmstat.c index 8fd603b1665e..14d8cb491ef7 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1139,13 +1139,14 @@ static const struct file_operations proc_vmstat_file_operations = { #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_SMP +static struct workqueue_struct *vmstat_wq; static DEFINE_PER_CPU(struct delayed_work, vmstat_work); int sysctl_stat_interval __read_mostly = HZ; static void vmstat_update(struct work_struct *w) { refresh_cpu_vm_stats(smp_processor_id()); - schedule_delayed_work(&__get_cpu_var(vmstat_work), + queue_delayed_work(vmstat_wq, &__get_cpu_var(vmstat_work), round_jiffies_relative(sysctl_stat_interval)); } @@ -1154,7 +1155,7 @@ static void __cpuinit start_cpu_timer(int cpu) struct delayed_work *work = &per_cpu(vmstat_work, cpu); INIT_DELAYED_WORK_DEFERRABLE(work, vmstat_update); - schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu)); + queue_delayed_work_on(cpu, vmstat_wq, work, __round_jiffies_relative(HZ, cpu)); } /* @@ -1204,6 +1205,7 @@ static int __init setup_vmstat(void) register_cpu_notifier(&vmstat_notifier); + vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); for_each_online_cpu(cpu) start_cpu_timer(cpu); #endif |