From c1955ce32fdb0877b7a1b22feb2669358f65be76 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 19 Jun 2010 23:08:06 +0200 Subject: writeback: remove wb_list The wb_list member of struct backing_device_info always has exactly one element. Just use the direct bdi->wb pointer instead and simplify some code. Also remove bdi_task_init which is now trivial to prepare for the next patch. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux/backing-dev.h') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e9aec0d099df..50f146146169 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -45,8 +45,6 @@ enum bdi_stat_item { #define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) struct bdi_writeback { - struct list_head list; /* hangs off the bdi */ - struct backing_dev_info *bdi; /* our parent bdi */ unsigned int nr; @@ -80,8 +78,7 @@ struct backing_dev_info { unsigned int max_ratio, max_prop_frac; struct bdi_writeback wb; /* default writeback info for this bdi */ - spinlock_t wb_lock; /* protects update side of wb_list */ - struct list_head wb_list; /* the flusher threads hanging off this bdi */ + spinlock_t wb_lock; /* protects work_list */ struct list_head work_list; -- cgit v1.2.3 From 082439004b31adc146e96e5f1c574dd2b57dcd93 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 19 Jun 2010 23:08:22 +0200 Subject: writeback: merge bdi_writeback_task and bdi_start_fn Move all code for the writeback thread into fs/fs-writeback.c instead of splitting it over two functions in two files. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 35 ++++++++++++++++++++++++++++++++++- include/linux/backing-dev.h | 2 +- mm/backing-dev.c | 44 +------------------------------------------- 3 files changed, 36 insertions(+), 45 deletions(-) (limited to 'include/linux/backing-dev.h') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d67989b8ba44..c8471b3ddccf 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -775,12 +775,36 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) * Handle writeback of dirty data for the device backed by this bdi. Also * wakes up periodically and does kupdated style flushing. */ -int bdi_writeback_task(struct bdi_writeback *wb) +int bdi_writeback_thread(void *data) { + struct bdi_writeback *wb = data; + struct backing_dev_info *bdi = wb->bdi; unsigned long last_active = jiffies; unsigned long wait_jiffies = -1UL; long pages_written; + /* + * Add us to the active bdi_list + */ + spin_lock_bh(&bdi_lock); + list_add_rcu(&bdi->bdi_list, &bdi_list); + spin_unlock_bh(&bdi_lock); + + current->flags |= PF_FLUSHER | PF_SWAPWRITE; + set_freezable(); + + /* + * Our parent may run at a different priority, just set us to normal + */ + set_user_nice(current, 0); + + /* + * Clear pending bit and wakeup anybody waiting to tear us down + */ + clear_bit(BDI_pending, &bdi->state); + smp_mb__after_clear_bit(); + wake_up_bit(&bdi->state, BDI_pending); + while (!kthread_should_stop()) { pages_written = wb_do_writeback(wb, 0); @@ -813,9 +837,18 @@ int bdi_writeback_task(struct bdi_writeback *wb) try_to_freeze(); } + wb->task = NULL; + + /* + * Flush any work that raced with us exiting. No new work + * will be added, since this bdi isn't discoverable anymore. + */ + if (!list_empty(&bdi->work_list)) + wb_do_writeback(wb, 1); return 0; } + /* * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back * the whole world. diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 50f146146169..e536f3a74e60 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -102,7 +102,7 @@ void bdi_unregister(struct backing_dev_info *bdi); int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages); void bdi_start_background_writeback(struct backing_dev_info *bdi); -int bdi_writeback_task(struct bdi_writeback *wb); +int bdi_writeback_thread(void *data); int bdi_has_dirty_io(struct backing_dev_info *bdi); void bdi_arm_supers_timer(void); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 6c2a09c8922c..bceac647e4d1 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -260,48 +260,6 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) INIT_LIST_HEAD(&wb->b_more_io); } -static int bdi_start_fn(void *ptr) -{ - struct bdi_writeback *wb = ptr; - struct backing_dev_info *bdi = wb->bdi; - int ret; - - /* - * Add us to the active bdi_list - */ - spin_lock_bh(&bdi_lock); - list_add_rcu(&bdi->bdi_list, &bdi_list); - spin_unlock_bh(&bdi_lock); - - current->flags |= PF_FLUSHER | PF_SWAPWRITE; - set_freezable(); - - /* - * Our parent may run at a different priority, just set us to normal - */ - set_user_nice(current, 0); - - /* - * Clear pending bit and wakeup anybody waiting to tear us down - */ - clear_bit(BDI_pending, &bdi->state); - smp_mb__after_clear_bit(); - wake_up_bit(&bdi->state, BDI_pending); - - ret = bdi_writeback_task(wb); - - wb->task = NULL; - - /* - * Flush any work that raced with us exiting. No new work - * will be added, since this bdi isn't discoverable anymore. - */ - if (!list_empty(&bdi->work_list)) - wb_do_writeback(wb, 1); - - return ret; -} - int bdi_has_dirty_io(struct backing_dev_info *bdi) { return wb_has_dirty_io(&bdi->wb); @@ -425,7 +383,7 @@ static int bdi_forker_task(void *ptr) spin_unlock_bh(&bdi_lock); wb = &bdi->wb; - wb->task = kthread_run(bdi_start_fn, wb, "flush-%s", + wb->task = kthread_run(bdi_writeback_thread, wb, "flush-%s", dev_name(bdi->dev)); /* * If task creation fails, then readd the bdi to -- cgit v1.2.3 From 6f904ff0e39ea88f81eb77e8dfb4e1238492f0a8 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 25 Jul 2010 14:29:11 +0300 Subject: writeback: harmonize writeback threads naming The write-back code mixes words "thread" and "task" for the same things. This is not a big deal, but still an inconsistency. hch: a convention I tend to use and I've seen in various places is to always use _task for the storage of the task_struct pointer, and thread everywhere else. This especially helps with having foo_thread for the actual thread and foo_task for a global variable keeping the task_struct pointer This patch renames: * 'bdi_add_default_flusher_task()' -> 'bdi_add_default_flusher_thread()' * 'bdi_forker_task()' -> 'bdi_forker_thread()' because bdi threads are 'bdi_writeback_thread()', so these names are more consistent. This patch also amends commentaries and makes them refer the forker and bdi threads as "thread", not "task". Also, while on it, make 'bdi_add_default_flusher_thread()' declaration use 'static void' instead of 'void static' and make checkpatch.pl happy. Signed-off-by: Artem Bityutskiy Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 2 +- include/linux/backing-dev.h | 2 +- mm/backing-dev.c | 26 +++++++++++++------------- 3 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux/backing-dev.h') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 261570deb22c..002be0ff2ab3 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -840,7 +840,7 @@ int bdi_writeback_thread(void *data) /* * Longest period of inactivity that we tolerate. If we - * see dirty data again later, the task will get + * see dirty data again later, the thread will get * recreated automatically. */ max_idle = max(5UL * 60 * HZ, wait_jiffies); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e536f3a74e60..f0936f5f85dd 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -50,7 +50,7 @@ struct bdi_writeback { unsigned long last_old_flush; /* last old data flush */ - struct task_struct *task; /* writeback task */ + struct task_struct *task; /* writeback thread */ struct list_head b_dirty; /* dirty inodes */ struct list_head b_io; /* parked for writeback */ struct list_head b_more_io; /* parked for more writeback */ diff --git a/mm/backing-dev.c b/mm/backing-dev.c index ac78a3336181..4e9ed2a8521f 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -50,7 +50,7 @@ static struct timer_list sync_supers_timer; static int bdi_sync_supers(void *); static void sync_supers_timer_fn(unsigned long); -static void bdi_add_default_flusher_task(struct backing_dev_info *bdi); +static void bdi_add_default_flusher_thread(struct backing_dev_info *bdi); #ifdef CONFIG_DEBUG_FS #include @@ -279,10 +279,10 @@ static void bdi_flush_io(struct backing_dev_info *bdi) } /* - * kupdated() used to do this. We cannot do it from the bdi_forker_task() + * kupdated() used to do this. We cannot do it from the bdi_forker_thread() * or we risk deadlocking on ->s_umount. The longer term solution would be * to implement sync_supers_bdi() or similar and simply do it from the - * bdi writeback tasks individually. + * bdi writeback thread individually. */ static int bdi_sync_supers(void *unused) { @@ -318,7 +318,7 @@ static void sync_supers_timer_fn(unsigned long unused) bdi_arm_supers_timer(); } -static int bdi_forker_task(void *ptr) +static int bdi_forker_thread(void *ptr) { struct bdi_writeback *me = ptr; @@ -354,7 +354,7 @@ static int bdi_forker_task(void *ptr) !bdi_has_dirty_io(bdi)) continue; - bdi_add_default_flusher_task(bdi); + bdi_add_default_flusher_thread(bdi); } set_current_state(TASK_INTERRUPTIBLE); @@ -376,7 +376,7 @@ static int bdi_forker_task(void *ptr) /* * This is our real job - check for pending entries in - * bdi_pending_list, and create the tasks that got added + * bdi_pending_list, and create the threads that got added */ bdi = list_entry(bdi_pending_list.next, struct backing_dev_info, bdi_list); @@ -387,7 +387,7 @@ static int bdi_forker_task(void *ptr) wb->task = kthread_run(bdi_writeback_thread, wb, "flush-%s", dev_name(bdi->dev)); /* - * If task creation fails, then readd the bdi to + * If thread creation fails, then readd the bdi to * the pending list and force writeout of the bdi * from this forker thread. That will free some memory * and we can try again. @@ -430,10 +430,10 @@ static void bdi_add_to_pending(struct rcu_head *head) } /* - * Add the default flusher task that gets created for any bdi + * Add the default flusher thread that gets created for any bdi * that has dirty data pending writeout */ -void static bdi_add_default_flusher_task(struct backing_dev_info *bdi) +static void bdi_add_default_flusher_thread(struct backing_dev_info *bdi) { if (!bdi_cap_writeback_dirty(bdi)) return; @@ -445,10 +445,10 @@ void static bdi_add_default_flusher_task(struct backing_dev_info *bdi) } /* - * Check with the helper whether to proceed adding a task. Will only + * Check with the helper whether to proceed adding a thread. Will only * abort if we two or more simultanous calls to - * bdi_add_default_flusher_task() occured, further additions will block - * waiting for previous additions to finish. + * bdi_add_default_flusher_thread() occured, further additions will + * block waiting for previous additions to finish. */ if (!test_and_set_bit(BDI_pending, &bdi->state)) { list_del_rcu(&bdi->bdi_list); @@ -506,7 +506,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, if (bdi_cap_flush_forker(bdi)) { struct bdi_writeback *wb = &bdi->wb; - wb->task = kthread_run(bdi_forker_task, wb, "bdi-%s", + wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s", dev_name(dev)); if (IS_ERR(wb->task)) { wb->task = NULL; -- cgit v1.2.3 From 080dcec41709be72613133f695be75b98dd43e88 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 25 Jul 2010 14:29:16 +0300 Subject: writeback: simplify bdi code a little This patch simplifies bdi code a little by removing the 'pending_list' which is redundant. Indeed, currently the forker thread ('bdi_forker_thread()') is working like this: 1. In a loop, fetch all bdi's which have works but have no writeback thread and move them to the 'pending_list'. 2. If the list is empty, sleep for 5 sec. 3. Otherwise, take one bdi from the list, fork the writeback thread for this bdi, and repeat the loop. IOW, it first moves everything to the 'pending_list', then process only one element, and so on. This patch simplifies the algorithm, which is now as follows. 1. Find the first bdi which has a work and remove it from the global list of bdi's (bdi_list). 2. If there was not such bdi, sleep 5 sec. 3. Fork the writeback thread for this bdi and repeat the loop. IOW, now we find the first bdi to process, process it, and so on. This is simpler and involves less lists. The bonus now is that we can get rid of a couple of functions, as well as remove complications which involve 'rcu_call()' and 'bdi->rcu_head'. This patch also makes sure we use 'list_add_tail_rcu()', instead of plain 'list_add_tail()', but this piece of code is going to be removed in the next patch anyway. Signed-off-by: Artem Bityutskiy Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 1 - mm/backing-dev.c | 82 ++++++++++----------------------------------- 2 files changed, 18 insertions(+), 65 deletions(-) (limited to 'include/linux/backing-dev.h') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f0936f5f85dd..95ecb2bebca8 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -58,7 +58,6 @@ struct bdi_writeback { struct backing_dev_info { struct list_head bdi_list; - struct rcu_head rcu_head; unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ unsigned long state; /* Always use atomic bitops on this */ unsigned int capabilities; /* Device capabilities */ diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 72e6eb96efe2..dbc66815a0fe 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -50,8 +50,6 @@ static struct timer_list sync_supers_timer; static int bdi_sync_supers(void *); static void sync_supers_timer_fn(unsigned long); -static void bdi_add_default_flusher_thread(struct backing_dev_info *bdi); - #ifdef CONFIG_DEBUG_FS #include #include @@ -331,6 +329,7 @@ static int bdi_forker_thread(void *ptr) set_user_nice(current, 0); for (;;) { + bool fork = false; struct task_struct *task; struct backing_dev_info *bdi, *tmp; @@ -349,23 +348,30 @@ static int bdi_forker_thread(void *ptr) * a thread registered. If so, set that up. */ list_for_each_entry_safe(bdi, tmp, &bdi_list, bdi_list) { + if (!bdi_cap_writeback_dirty(bdi)) + continue; if (bdi->wb.task) continue; if (list_empty(&bdi->work_list) && !bdi_has_dirty_io(bdi)) continue; - bdi_add_default_flusher_thread(bdi); + WARN(!test_bit(BDI_registered, &bdi->state), + "bdi %p/%s is not registered!\n", bdi, bdi->name); + + list_del_rcu(&bdi->bdi_list); + fork = true; + break; } + spin_unlock_bh(&bdi_lock); /* Keep working if default bdi still has things to do */ if (!list_empty(&me->bdi->work_list)) __set_current_state(TASK_RUNNING); - if (list_empty(&bdi_pending_list)) { + if (!fork) { unsigned long wait; - spin_unlock_bh(&bdi_lock); wait = msecs_to_jiffies(dirty_writeback_interval * 10); if (wait) schedule_timeout(wait); @@ -378,13 +384,13 @@ static int bdi_forker_thread(void *ptr) __set_current_state(TASK_RUNNING); /* - * This is our real job - check for pending entries in - * bdi_pending_list, and create the threads that got added + * Set the pending bit - if someone will try to unregister this + * bdi - it'll wait on this bit. */ - bdi = list_entry(bdi_pending_list.next, struct backing_dev_info, - bdi_list); - list_del_init(&bdi->bdi_list); - spin_unlock_bh(&bdi_lock); + set_bit(BDI_pending, &bdi->state); + + /* Make sure no one uses the picked bdi */ + synchronize_rcu(); task = kthread_run(bdi_writeback_thread, &bdi->wb, "flush-%s", dev_name(bdi->dev)); @@ -397,7 +403,7 @@ static int bdi_forker_thread(void *ptr) * flush other bdi's to free memory. */ spin_lock_bh(&bdi_lock); - list_add_tail(&bdi->bdi_list, &bdi_pending_list); + list_add_tail_rcu(&bdi->bdi_list, &bdi_list); spin_unlock_bh(&bdi_lock); bdi_flush_io(bdi); @@ -408,57 +414,6 @@ static int bdi_forker_thread(void *ptr) return 0; } -static void bdi_add_to_pending(struct rcu_head *head) -{ - struct backing_dev_info *bdi; - - bdi = container_of(head, struct backing_dev_info, rcu_head); - INIT_LIST_HEAD(&bdi->bdi_list); - - spin_lock(&bdi_lock); - list_add_tail(&bdi->bdi_list, &bdi_pending_list); - spin_unlock(&bdi_lock); - - /* - * We are now on the pending list, wake up bdi_forker_task() - * to finish the job and add us back to the active bdi_list - */ - wake_up_process(default_backing_dev_info.wb.task); -} - -/* - * Add the default flusher thread that gets created for any bdi - * that has dirty data pending writeout - */ -static void bdi_add_default_flusher_thread(struct backing_dev_info *bdi) -{ - if (!bdi_cap_writeback_dirty(bdi)) - return; - - if (WARN_ON(!test_bit(BDI_registered, &bdi->state))) { - printk(KERN_ERR "bdi %p/%s is not registered!\n", - bdi, bdi->name); - return; - } - - /* - * Check with the helper whether to proceed adding a thread. Will only - * abort if we two or more simultanous calls to - * bdi_add_default_flusher_thread() occured, further additions will - * block waiting for previous additions to finish. - */ - if (!test_and_set_bit(BDI_pending, &bdi->state)) { - list_del_rcu(&bdi->bdi_list); - - /* - * We must wait for the current RCU period to end before - * moving to the pending list. So schedule that operation - * from an RCU callback. - */ - call_rcu(&bdi->rcu_head, bdi_add_to_pending); - } -} - /* * Remove bdi from bdi_list, and ensure that it is no longer visible */ @@ -599,7 +554,6 @@ int bdi_init(struct backing_dev_info *bdi) bdi->max_ratio = 100; bdi->max_prop_frac = PROP_FRAC_BASE; spin_lock_init(&bdi->wb_lock); - INIT_RCU_HEAD(&bdi->rcu_head); INIT_LIST_HEAD(&bdi->bdi_list); INIT_LIST_HEAD(&bdi->work_list); -- cgit v1.2.3 From ecd584030da67ede1bf17955746a6ce834d9fc6b Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 25 Jul 2010 14:29:18 +0300 Subject: writeback: move last_active to bdi Currently bdi threads use local variable 'last_active' which stores last time when the bdi thread did some useful work. Move this local variable to 'struct bdi_writeback'. This is just a preparation for the further patches which will make the forker thread decide when bdi threads should be killed. Signed-off-by: Artem Bityutskiy Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 6 +++--- include/linux/backing-dev.h | 13 +++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux/backing-dev.h') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 57fbfd0ebc52..9f5cab75c157 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -800,12 +800,12 @@ int bdi_writeback_thread(void *data) { struct bdi_writeback *wb = data; struct backing_dev_info *bdi = wb->bdi; - unsigned long last_active = jiffies; unsigned long wait_jiffies = -1UL; long pages_written; current->flags |= PF_FLUSHER | PF_SWAPWRITE; set_freezable(); + wb->last_active = jiffies; /* * Our parent may run at a different priority, just set us to normal @@ -827,7 +827,7 @@ int bdi_writeback_thread(void *data) trace_writeback_pages_written(pages_written); if (pages_written) - last_active = jiffies; + wb->last_active = jiffies; else if (wait_jiffies != -1UL) { unsigned long max_idle; @@ -837,7 +837,7 @@ int bdi_writeback_thread(void *data) * recreated automatically. */ max_idle = max(5UL * 60 * HZ, wait_jiffies); - if (time_after(jiffies, max_idle + last_active)) + if (time_after(jiffies, max_idle + wb->last_active)) break; } diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 95ecb2bebca8..71b6223e0a77 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -45,15 +45,16 @@ enum bdi_stat_item { #define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) struct bdi_writeback { - struct backing_dev_info *bdi; /* our parent bdi */ + struct backing_dev_info *bdi; /* our parent bdi */ unsigned int nr; - unsigned long last_old_flush; /* last old data flush */ + unsigned long last_old_flush; /* last old data flush */ + unsigned long last_active; /* last time bdi thread was active */ - struct task_struct *task; /* writeback thread */ - struct list_head b_dirty; /* dirty inodes */ - struct list_head b_io; /* parked for writeback */ - struct list_head b_more_io; /* parked for more writeback */ + struct task_struct *task; /* writeback thread */ + struct list_head b_dirty; /* dirty inodes */ + struct list_head b_io; /* parked for writeback */ + struct list_head b_more_io; /* parked for more writeback */ }; struct backing_dev_info { -- cgit v1.2.3 From 6467716a37673e8d47b4984eb19839bdad0a8353 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 25 Jul 2010 14:29:22 +0300 Subject: writeback: optimize periodic bdi thread wakeups Whe the first inode for a bdi is marked dirty, we wake up the bdi thread which should take care of the periodic background write-out. However, the write-out will actually start only 'dirty_writeback_interval' centisecs later, so we can delay the wake-up. This change was requested by Nick Piggin who pointed out that if we delay the wake-up, we weed out 2 unnecessary contex switches, which matters because '__mark_inode_dirty()' is a hot-path function. This patch introduces a new function - 'bdi_wakeup_thread_delayed()', which sets up a timer to wake-up the bdi thread and returns. So the wake-up is delayed. We also delete the timer in bdi threads just before writing-back. And synchronously delete it when unregistering bdi. At the unregister point the bdi does not have any users, so no one can arm it again. Since now we take 'bdi->wb_lock' in the timer, which can execute in softirq context, we have to use 'spin_lock_bh()' for 'bdi->wb_lock'. This patch makes this change as well. This patch also moves the 'bdi_wb_init()' function down in the file to avoid forward-declaration of 'bdi_wakeup_thread_delayed()'. Signed-off-by: Artem Bityutskiy Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 36 +++++++--------------- include/linux/backing-dev.h | 2 ++ mm/backing-dev.c | 73 +++++++++++++++++++++++++++++++++++---------- 3 files changed, 70 insertions(+), 41 deletions(-) (limited to 'include/linux/backing-dev.h') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 55f6e46e06f1..bfa2df2c7ce2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -76,7 +76,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi, { trace_writeback_queue(bdi, work); - spin_lock(&bdi->wb_lock); + spin_lock_bh(&bdi->wb_lock); list_add_tail(&work->list, &bdi->work_list); if (bdi->wb.task) { wake_up_process(bdi->wb.task); @@ -88,7 +88,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi, trace_writeback_nothread(bdi, work); wake_up_process(default_backing_dev_info.wb.task); } - spin_unlock(&bdi->wb_lock); + spin_unlock_bh(&bdi->wb_lock); } static void @@ -704,13 +704,13 @@ get_next_work_item(struct backing_dev_info *bdi) { struct wb_writeback_work *work = NULL; - spin_lock(&bdi->wb_lock); + spin_lock_bh(&bdi->wb_lock); if (!list_empty(&bdi->work_list)) { work = list_entry(bdi->work_list.next, struct wb_writeback_work, list); list_del_init(&work->list); } - spin_unlock(&bdi->wb_lock); + spin_unlock_bh(&bdi->wb_lock); return work; } @@ -810,6 +810,12 @@ int bdi_writeback_thread(void *data) trace_writeback_thread_start(bdi); while (!kthread_should_stop()) { + /* + * Remove own delayed wake-up timer, since we are already awake + * and we'll take care of the preriodic write-back. + */ + del_timer(&wb->wakeup_timer); + pages_written = wb_do_writeback(wb, 0); trace_writeback_pages_written(pages_written); @@ -868,26 +874,6 @@ void wakeup_flusher_threads(long nr_pages) rcu_read_unlock(); } -/* - * This function is used when the first inode for this bdi is marked dirty. It - * wakes-up the corresponding bdi thread which should then take care of the - * periodic background write-out of dirty inodes. - */ -static void wakeup_bdi_thread(struct backing_dev_info *bdi) -{ - spin_lock(&bdi->wb_lock); - if (bdi->wb.task) - wake_up_process(bdi->wb.task); - else - /* - * When bdi tasks are inactive for long time, they are killed. - * In this case we have to wake-up the forker thread which - * should create and run the bdi thread. - */ - wake_up_process(default_backing_dev_info.wb.task); - spin_unlock(&bdi->wb_lock); -} - static noinline void block_dump___mark_inode_dirty(struct inode *inode) { if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { @@ -1019,7 +1005,7 @@ out: spin_unlock(&inode_lock); if (wakeup_bdi) - wakeup_bdi_thread(bdi); + bdi_wakeup_thread_delayed(bdi); } EXPORT_SYMBOL(__mark_inode_dirty); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 71b6223e0a77..7628219e5386 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -52,6 +52,7 @@ struct bdi_writeback { unsigned long last_active; /* last time bdi thread was active */ struct task_struct *task; /* writeback thread */ + struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */ struct list_head b_dirty; /* dirty inodes */ struct list_head b_io; /* parked for writeback */ struct list_head b_more_io; /* parked for more writeback */ @@ -105,6 +106,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi); int bdi_writeback_thread(void *data); int bdi_has_dirty_io(struct backing_dev_info *bdi); void bdi_arm_supers_timer(void); +void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); extern spinlock_t bdi_lock; extern struct list_head bdi_list; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index a9a08d88a745..cfff7225138c 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -248,17 +248,6 @@ static int __init default_bdi_init(void) } subsys_initcall(default_bdi_init); -static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) -{ - memset(wb, 0, sizeof(*wb)); - - wb->bdi = bdi; - wb->last_old_flush = jiffies; - INIT_LIST_HEAD(&wb->b_dirty); - INIT_LIST_HEAD(&wb->b_io); - INIT_LIST_HEAD(&wb->b_more_io); -} - int bdi_has_dirty_io(struct backing_dev_info *bdi) { return wb_has_dirty_io(&bdi->wb); @@ -316,6 +305,43 @@ static void sync_supers_timer_fn(unsigned long unused) bdi_arm_supers_timer(); } +static void wakeup_timer_fn(unsigned long data) +{ + struct backing_dev_info *bdi = (struct backing_dev_info *)data; + + spin_lock_bh(&bdi->wb_lock); + if (bdi->wb.task) { + wake_up_process(bdi->wb.task); + } else { + /* + * When bdi tasks are inactive for long time, they are killed. + * In this case we have to wake-up the forker thread which + * should create and run the bdi thread. + */ + wake_up_process(default_backing_dev_info.wb.task); + } + spin_unlock_bh(&bdi->wb_lock); +} + +/* + * This function is used when the first inode for this bdi is marked dirty. It + * wakes-up the corresponding bdi thread which should then take care of the + * periodic background write-out of dirty inodes. Since the write-out would + * starts only 'dirty_writeback_interval' centisecs from now anyway, we just + * set up a timer which wakes the bdi thread up later. + * + * Note, we wouldn't bother setting up the timer, but this function is on the + * fast-path (used by '__mark_inode_dirty()'), so we save few context switches + * by delaying the wake-up. + */ +void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi) +{ + unsigned long timeout; + + timeout = msecs_to_jiffies(dirty_writeback_interval * 10); + mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout); +} + /* * Calculate the longest interval (jiffies) bdi threads are allowed to be * inactive. @@ -353,8 +379,10 @@ static int bdi_forker_thread(void *ptr) * Temporary measure, we want to make sure we don't see * dirty data on the default backing_dev_info */ - if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) + if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) { + del_timer(&me->wakeup_timer); wb_do_writeback(me, 0); + } spin_lock_bh(&bdi_lock); set_current_state(TASK_INTERRUPTIBLE); @@ -386,7 +414,7 @@ static int bdi_forker_thread(void *ptr) break; } - spin_lock(&bdi->wb_lock); + spin_lock_bh(&bdi->wb_lock); /* * If there is no work to do and the bdi thread was * inactive long enough - kill it. The wb_lock is taken @@ -403,7 +431,7 @@ static int bdi_forker_thread(void *ptr) action = KILL_THREAD; break; } - spin_unlock(&bdi->wb_lock); + spin_unlock_bh(&bdi->wb_lock); } spin_unlock_bh(&bdi_lock); @@ -427,9 +455,9 @@ static int bdi_forker_thread(void *ptr) * The spinlock makes sure we do not lose * wake-ups when racing with 'bdi_queue_work()'. */ - spin_lock(&bdi->wb_lock); + spin_lock_bh(&bdi->wb_lock); bdi->wb.task = task; - spin_unlock(&bdi->wb_lock); + spin_unlock_bh(&bdi->wb_lock); } break; @@ -586,6 +614,7 @@ void bdi_unregister(struct backing_dev_info *bdi) if (bdi->dev) { trace_writeback_bdi_unregister(bdi); bdi_prune_sb(bdi); + del_timer_sync(&bdi->wb.wakeup_timer); if (!bdi_cap_flush_forker(bdi)) bdi_wb_shutdown(bdi); @@ -596,6 +625,18 @@ void bdi_unregister(struct backing_dev_info *bdi) } EXPORT_SYMBOL(bdi_unregister); +static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) +{ + memset(wb, 0, sizeof(*wb)); + + wb->bdi = bdi; + wb->last_old_flush = jiffies; + INIT_LIST_HEAD(&wb->b_dirty); + INIT_LIST_HEAD(&wb->b_io); + INIT_LIST_HEAD(&wb->b_more_io); + setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi); +} + int bdi_init(struct backing_dev_info *bdi) { int i, err; -- cgit v1.2.3 From 81d73a32d775ae9674ea6edf0b5b721fc3bc57d9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 11 Aug 2010 14:17:44 -0700 Subject: mm: fix writeback_in_progress() Commit 83ba7b071f3 ("writeback: simplify the write back thread queue") broke writeback_in_progress() as in that commit we started to remove work items from the list at the moment we start working on them and not at the moment they are finished. Thus if the flusher thread was doing some work but there was no other work queued, writeback_in_progress() returned false. This could in particular cause unnecessary queueing of background writeback from balance_dirty_pages() or writeout work from writeback_sb_if_idle(). This patch fixes the problem by introducing a bit in the bdi state which indicates that the flusher thread is processing some work and uses this bit for writeback_in_progress() test. NOTE: Both callsites of writeback_in_progress() (namely, writeback_inodes_sb_if_idle() and balance_dirty_pages()) would actually need a different information than what writeback_in_progress() provides. They would need to know whether *the kind of writeback they are going to submit* is already queued. But this information isn't that simple to provide so let's fix writeback_in_progress() for the time being. Signed-off-by: Jan Kara Cc: Christoph Hellwig Cc: Wu Fengguang Acked-by: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 4 +++- include/linux/backing-dev.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux/backing-dev.h') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8a5807d2fb9d..7d9d06ba184b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -68,7 +68,7 @@ int nr_pdflush_threads; */ int writeback_in_progress(struct backing_dev_info *bdi) { - return !list_empty(&bdi->work_list); + return test_bit(BDI_writeback_running, &bdi->state); } static void bdi_queue_work(struct backing_dev_info *bdi, @@ -740,6 +740,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) struct wb_writeback_work *work; long wrote = 0; + set_bit(BDI_writeback_running, &wb->bdi->state); while ((work = get_next_work_item(bdi)) != NULL) { /* * Override sync mode, in case we must wait for completion @@ -766,6 +767,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) * Check for periodic writeback, kupdated() style */ wrote += wb_check_old_data_flush(wb); + clear_bit(BDI_writeback_running, &wb->bdi->state); return wrote; } diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 7628219e5386..35b00746c712 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -31,6 +31,7 @@ enum bdi_state { BDI_async_congested, /* The async (write) queue is getting full */ BDI_sync_congested, /* The sync queue is getting full */ BDI_registered, /* bdi_register() was done */ + BDI_writeback_running, /* Writeback is in progress */ BDI_unused, /* Available bits start here */ }; -- cgit v1.2.3