From 2c96ce9f2084c1e04d02883e622f74a537a63aea Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 15 Sep 2009 09:43:56 +0200 Subject: fs: remove bdev->bd_inode_backing_dev_info It has been unused since it was introduced in: commit 520808bf20e90fdbdb320264ba7dd5cf9d47dcac Author: Andrew Morton Date: Fri May 21 00:46:17 2004 -0700 [PATCH] block device layer: separate backing_dev_info infrastructure So lets just kill it. Acked-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b21cf6b9c80b..db29588874ac 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -655,7 +655,6 @@ struct block_device { int bd_invalidated; struct gendisk * bd_disk; struct list_head bd_list; - struct backing_dev_info *bd_inode_backing_dev_info; /* * Private data. You must have bd_claim'ed the block_device * to use this. NOTE: bd_claim allows an owner to claim -- cgit v1.2.3 From 1fe06ad89255c211fe100d7f690d10b161398df8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 15 Sep 2009 15:10:20 +0200 Subject: writeback: get rid of wbc->for_writepages It's only set, it's never checked. Kill it. Acked-by: Jan Kara Signed-off-by: Jens Axboe --- fs/afs/write.c | 1 - fs/btrfs/ordered-data.c | 1 - fs/jbd2/commit.c | 1 - fs/nfs/write.c | 1 - include/linux/writeback.h | 1 - include/trace/events/ext4.h | 6 ++---- mm/page-writeback.c | 2 -- 7 files changed, 2 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/afs/write.c b/fs/afs/write.c index c2e7a7ff0080..c63a3c8beb73 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -712,7 +712,6 @@ int afs_writeback_all(struct afs_vnode *vnode) .bdi = mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, - .for_writepages = 1, .range_cyclic = 1, }; int ret; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index d6f0806c682f..7b2f401e604e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -740,7 +740,6 @@ int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, .nr_to_write = mapping->nrpages * 2, .range_start = start, .range_end = end, - .for_writepages = 1, }; return btrfs_writepages(mapping, &wbc); } diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7b4088b2364d..0df600e9162d 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -220,7 +220,6 @@ static int journal_submit_inode_data_buffers(struct address_space *mapping) .nr_to_write = mapping->nrpages * 2, .range_start = 0, .range_end = i_size_read(mapping->host), - .for_writepages = 1, }; ret = generic_writepages(mapping, &wbc); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 120acadc6a84..53eb26c16b50 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1490,7 +1490,6 @@ static int nfs_write_mapping(struct address_space *mapping, int how) .nr_to_write = LONG_MAX, .range_start = 0, .range_end = LLONG_MAX, - .for_writepages = 1, }; return __nfs_write_mapping(mapping, &wbc, how); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d347632f1861..48a054e2b716 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -50,7 +50,6 @@ struct writeback_control { unsigned encountered_congestion:1; /* An output: a queue is full */ unsigned for_kupdate:1; /* A kupdate writeback */ unsigned for_reclaim:1; /* Invoked from the page allocator */ - unsigned for_writepages:1; /* This is a writepages() call */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned more_io:1; /* more io to be dispatched */ /* diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 7d8b5bc74185..8d433c4e3709 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -227,7 +227,6 @@ TRACE_EVENT(ext4_da_writepages, __field( char, nonblocking ) __field( char, for_kupdate ) __field( char, for_reclaim ) - __field( char, for_writepages ) __field( char, range_cyclic ) ), @@ -241,16 +240,15 @@ TRACE_EVENT(ext4_da_writepages, __entry->nonblocking = wbc->nonblocking; __entry->for_kupdate = wbc->for_kupdate; __entry->for_reclaim = wbc->for_reclaim; - __entry->for_writepages = wbc->for_writepages; __entry->range_cyclic = wbc->range_cyclic; ), - TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d for_writepages %d range_cyclic %d", + TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d", jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write, __entry->pages_skipped, __entry->range_start, __entry->range_end, __entry->nonblocking, __entry->for_kupdate, __entry->for_reclaim, - __entry->for_writepages, __entry->range_cyclic) + __entry->range_cyclic) ); TRACE_EVENT(ext4_da_writepages_result, diff --git a/mm/page-writeback.c b/mm/page-writeback.c index dd73d29c15a8..abc648f5de00 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1020,12 +1020,10 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) if (wbc->nr_to_write <= 0) return 0; - wbc->for_writepages = 1; if (mapping->a_ops->writepages) ret = mapping->a_ops->writepages(mapping, wbc); else ret = generic_writepages(mapping, wbc); - wbc->for_writepages = 0; return ret; } -- cgit v1.2.3 From 32a88aa1b6dfb901cec64e1898cac78d0f25028a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 16 Sep 2009 15:02:33 +0200 Subject: fs: Assign bdi in super_block We do this automatically in get_sb_bdev() from the set_bdev_super() callback. Filesystems that have their own private backing_dev_info must assign that in ->fill_super(). Note that ->s_bdi assignment is required for proper writeback! Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/btrfs/disk-io.c | 1 + fs/fuse/inode.c | 2 ++ fs/nfs/super.c | 2 ++ fs/super.c | 6 ++++++ fs/sync.c | 9 ++++++++- fs/ubifs/super.c | 1 + include/linux/fs.h | 1 + 7 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 15831d5c7367..8b8192790011 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1600,6 +1600,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, sb->s_blocksize = 4096; sb->s_blocksize_bits = blksize_bits(4096); + sb->s_bdi = &fs_info->bdi; /* * we set the i_size on the btree inode to the max possible int. diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 4567db6f9430..e5dbecd87b0f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -894,6 +894,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (err) goto err_put_conn; + sb->s_bdi = &fc->bdi; + /* Handle umasking inside the fuse code */ if (sb->s_flags & MS_POSIXACL) fc->dont_mask = 1; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 867f70504531..de935692d40d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1918,6 +1918,8 @@ static inline void nfs_initialise_sb(struct super_block *sb) if (server->flags & NFS_MOUNT_NOAC) sb->s_flags |= MS_SYNCHRONOUS; + sb->s_bdi = &server->backing_dev_info; + nfs_super_set_maxbytes(sb, server->maxfilesize); } diff --git a/fs/super.c b/fs/super.c index 9cda337ddae2..b03fea8fbfb6 100644 --- a/fs/super.c +++ b/fs/super.c @@ -707,6 +707,12 @@ static int set_bdev_super(struct super_block *s, void *data) { s->s_bdev = data; s->s_dev = s->s_bdev->bd_dev; + + /* + * We set the bdi here to the queue backing, file systems can + * overwrite this in ->fill_super() + */ + s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; return 0; } diff --git a/fs/sync.c b/fs/sync.c index 192340930bb4..c08467a5d7cb 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -27,6 +27,13 @@ */ static int __sync_filesystem(struct super_block *sb, int wait) { + /* + * This should be safe, as we require bdi backing to actually + * write out data in the first place + */ + if (!sb->s_bdi) + return 0; + /* Avoid doing twice syncing and cache pruning for quota sync */ if (!wait) { writeout_quota_sb(sb, -1); @@ -101,7 +108,7 @@ restart: spin_unlock(&sb_lock); down_read(&sb->s_umount); - if (!(sb->s_flags & MS_RDONLY) && sb->s_root) + if (!(sb->s_flags & MS_RDONLY) && sb->s_root && sb->s_bdi) __sync_filesystem(sb, wait); up_read(&sb->s_umount); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 51763aa8f4de..c4af069df1ad 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1980,6 +1980,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto out_bdi; + sb->s_bdi = &c->bdi; sb->s_fs_info = c; sb->s_magic = UBIFS_SUPER_MAGIC; sb->s_blocksize = UBIFS_BLOCK_SIZE; diff --git a/include/linux/fs.h b/include/linux/fs.h index db29588874ac..90162fb3bf04 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1342,6 +1342,7 @@ struct super_block { int s_nr_dentry_unused; /* # of dentry on lru */ struct block_device *s_bdev; + struct backing_dev_info *s_bdi; struct mtd_info *s_mtd; struct list_head s_instances; struct quota_info s_dquot; /* Diskquota specific options */ -- cgit v1.2.3 From cfc4ba5365449cb6b5c9f68d755a142f17da1e47 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 14 Sep 2009 13:12:40 +0200 Subject: writeback: use RCU to protect bdi_list Now that bdi_writeback_all() no longer handles integrity writeback, it doesn't have to block anymore. This means that we can switch bdi_list reader side protection to RCU. Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 6 ++-- include/linux/backing-dev.h | 1 + mm/backing-dev.c | 76 ++++++++++++++++++++++++++++++++------------- mm/page-writeback.c | 8 ++--- 4 files changed, 63 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 14f06b459197..f8cd7a97f5b7 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -868,16 +868,16 @@ static void bdi_writeback_all(struct writeback_control *wbc) WARN_ON(wbc->sync_mode == WB_SYNC_ALL); - spin_lock(&bdi_lock); + rcu_read_lock(); - list_for_each_entry(bdi, &bdi_list, bdi_list) { + list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { if (!bdi_has_dirty_io(bdi)) continue; bdi_alloc_queue_work(bdi, wbc); } - spin_unlock(&bdi_lock); + rcu_read_unlock(); } /* diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f169bcb90b58..859e797f4576 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -59,6 +59,7 @@ struct bdi_writeback { struct backing_dev_info { struct list_head bdi_list; + struct rcu_head rcu_head; unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ unsigned long state; /* Always use atomic bitops on this */ unsigned int capabilities; /* Device capabilities */ diff --git a/mm/backing-dev.c b/mm/backing-dev.c index d3ca0dac1111..fd93566345b6 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -26,6 +26,12 @@ struct backing_dev_info default_backing_dev_info = { EXPORT_SYMBOL_GPL(default_backing_dev_info); static struct class *bdi_class; + +/* + * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as + * reader side protection for bdi_pending_list. bdi_list has RCU reader side + * locking. + */ DEFINE_SPINLOCK(bdi_lock); LIST_HEAD(bdi_list); LIST_HEAD(bdi_pending_list); @@ -284,9 +290,9 @@ static int bdi_start_fn(void *ptr) /* * Add us to the active bdi_list */ - spin_lock(&bdi_lock); - list_add(&bdi->bdi_list, &bdi_list); - spin_unlock(&bdi_lock); + spin_lock_bh(&bdi_lock); + list_add_rcu(&bdi->bdi_list, &bdi_list); + spin_unlock_bh(&bdi_lock); bdi_task_init(bdi, wb); @@ -389,7 +395,7 @@ static int bdi_forker_task(void *ptr) if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) wb_do_writeback(me, 0); - spin_lock(&bdi_lock); + spin_lock_bh(&bdi_lock); /* * Check if any existing bdi's have dirty data without @@ -410,7 +416,7 @@ static int bdi_forker_task(void *ptr) if (list_empty(&bdi_pending_list)) { unsigned long wait; - spin_unlock(&bdi_lock); + spin_unlock_bh(&bdi_lock); wait = msecs_to_jiffies(dirty_writeback_interval * 10); schedule_timeout(wait); try_to_freeze(); @@ -426,7 +432,7 @@ static int bdi_forker_task(void *ptr) bdi = list_entry(bdi_pending_list.next, struct backing_dev_info, bdi_list); list_del_init(&bdi->bdi_list); - spin_unlock(&bdi_lock); + spin_unlock_bh(&bdi_lock); wb = &bdi->wb; wb->task = kthread_run(bdi_start_fn, wb, "flush-%s", @@ -445,9 +451,9 @@ static int bdi_forker_task(void *ptr) * a chance to flush other bdi's to free * memory. */ - spin_lock(&bdi_lock); + spin_lock_bh(&bdi_lock); list_add_tail(&bdi->bdi_list, &bdi_pending_list); - spin_unlock(&bdi_lock); + spin_unlock_bh(&bdi_lock); bdi_flush_io(bdi); } @@ -456,6 +462,24 @@ static int bdi_forker_task(void *ptr) return 0; } +static void bdi_add_to_pending(struct rcu_head *head) +{ + struct backing_dev_info *bdi; + + bdi = container_of(head, struct backing_dev_info, rcu_head); + INIT_LIST_HEAD(&bdi->bdi_list); + + spin_lock(&bdi_lock); + list_add_tail(&bdi->bdi_list, &bdi_pending_list); + spin_unlock(&bdi_lock); + + /* + * We are now on the pending list, wake up bdi_forker_task() + * to finish the job and add us back to the active bdi_list + */ + wake_up_process(default_backing_dev_info.wb.task); +} + /* * Add the default flusher task that gets created for any bdi * that has dirty data pending writeout @@ -478,16 +502,29 @@ void static bdi_add_default_flusher_task(struct backing_dev_info *bdi) * waiting for previous additions to finish. */ if (!test_and_set_bit(BDI_pending, &bdi->state)) { - list_move_tail(&bdi->bdi_list, &bdi_pending_list); + list_del_rcu(&bdi->bdi_list); /* - * We are now on the pending list, wake up bdi_forker_task() - * to finish the job and add us back to the active bdi_list + * We must wait for the current RCU period to end before + * moving to the pending list. So schedule that operation + * from an RCU callback. */ - wake_up_process(default_backing_dev_info.wb.task); + call_rcu(&bdi->rcu_head, bdi_add_to_pending); } } +/* + * Remove bdi from bdi_list, and ensure that it is no longer visible + */ +static void bdi_remove_from_list(struct backing_dev_info *bdi) +{ + spin_lock_bh(&bdi_lock); + list_del_rcu(&bdi->bdi_list); + spin_unlock_bh(&bdi_lock); + + synchronize_rcu(); +} + int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...) { @@ -506,9 +543,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, goto exit; } - spin_lock(&bdi_lock); - list_add_tail(&bdi->bdi_list, &bdi_list); - spin_unlock(&bdi_lock); + spin_lock_bh(&bdi_lock); + list_add_tail_rcu(&bdi->bdi_list, &bdi_list); + spin_unlock_bh(&bdi_lock); bdi->dev = dev; @@ -526,9 +563,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, wb->task = NULL; ret = -ENOMEM; - spin_lock(&bdi_lock); - list_del(&bdi->bdi_list); - spin_unlock(&bdi_lock); + bdi_remove_from_list(bdi); goto exit; } } @@ -565,9 +600,7 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) /* * Make sure nobody finds us on the bdi_list anymore */ - spin_lock(&bdi_lock); - list_del(&bdi->bdi_list); - spin_unlock(&bdi_lock); + bdi_remove_from_list(bdi); /* * Finally, kill the kernel threads. We don't need to be RCU @@ -599,6 +632,7 @@ int bdi_init(struct backing_dev_info *bdi) bdi->max_ratio = 100; bdi->max_prop_frac = PROP_FRAC_BASE; spin_lock_init(&bdi->wb_lock); + INIT_RCU_HEAD(&bdi->rcu_head); INIT_LIST_HEAD(&bdi->bdi_list); INIT_LIST_HEAD(&bdi->wb_list); INIT_LIST_HEAD(&bdi->work_list); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index abc648f5de00..12c3d843ce93 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -315,7 +315,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { int ret = 0; - spin_lock(&bdi_lock); + spin_lock_bh(&bdi_lock); if (min_ratio > bdi->max_ratio) { ret = -EINVAL; } else { @@ -327,7 +327,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) ret = -EINVAL; } } - spin_unlock(&bdi_lock); + spin_unlock_bh(&bdi_lock); return ret; } @@ -339,14 +339,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) if (max_ratio > 100) return -EINVAL; - spin_lock(&bdi_lock); + spin_lock_bh(&bdi_lock); if (bdi->min_ratio > max_ratio) { ret = -EINVAL; } else { bdi->max_ratio = max_ratio; bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100; } - spin_unlock(&bdi_lock); + spin_unlock_bh(&bdi_lock); return ret; } -- cgit v1.2.3 From b6e51316daede0633e9274e1e30391cfa4747877 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 16 Sep 2009 15:13:54 +0200 Subject: writeback: separate starting of sync vs opportunistic writeback bdi_start_writeback() is currently split into two paths, one for WB_SYNC_NONE and one for WB_SYNC_ALL. Add bdi_sync_writeback() for WB_SYNC_ALL writeback and let bdi_start_writeback() handle only WB_SYNC_NONE. Push down the writeback_control allocation and only accept the parameters that make sense for each function. This cleans up the API considerably. Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 132 ++++++++++++++++++++++---------------------- fs/ubifs/budget.c | 20 +------ include/linux/backing-dev.h | 2 +- include/linux/writeback.h | 4 +- mm/page-writeback.c | 12 +--- 5 files changed, 75 insertions(+), 95 deletions(-) (limited to 'include/linux') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 59b3ee63b624..5887328b5a06 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -74,14 +74,10 @@ static inline bool bdi_work_on_stack(struct bdi_work *work) } static inline void bdi_work_init(struct bdi_work *work, - struct writeback_control *wbc) + struct wb_writeback_args *args) { INIT_RCU_HEAD(&work->rcu_head); - work->args.sb = wbc->sb; - work->args.nr_pages = wbc->nr_to_write; - work->args.sync_mode = wbc->sync_mode; - work->args.range_cyclic = wbc->range_cyclic; - work->args.for_kupdate = 0; + work->args = *args; work->state = WS_USED; } @@ -194,7 +190,7 @@ static void bdi_wait_on_work_clear(struct bdi_work *work) } static void bdi_alloc_queue_work(struct backing_dev_info *bdi, - struct writeback_control *wbc) + struct wb_writeback_args *args) { struct bdi_work *work; @@ -204,7 +200,7 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi, */ work = kmalloc(sizeof(*work), GFP_ATOMIC); if (work) { - bdi_work_init(work, wbc); + bdi_work_init(work, args); bdi_queue_work(bdi, work); } else { struct bdi_writeback *wb = &bdi->wb; @@ -214,24 +210,54 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi, } } -void bdi_start_writeback(struct writeback_control *wbc) +/** + * bdi_sync_writeback - start and wait for writeback + * @bdi: the backing device to write from + * @sb: write inodes from this super_block + * + * Description: + * This does WB_SYNC_ALL data integrity writeback and waits for the + * IO to complete. Callers must hold the sb s_umount semaphore for + * reading, to avoid having the super disappear before we are done. + */ +static void bdi_sync_writeback(struct backing_dev_info *bdi, + struct super_block *sb) { - /* - * WB_SYNC_NONE is opportunistic writeback. If this allocation fails, - * bdi_queue_work() will wake up the thread and flush old data. This - * should ensure some amount of progress in freeing memory. - */ - if (wbc->sync_mode != WB_SYNC_ALL) - bdi_alloc_queue_work(wbc->bdi, wbc); - else { - struct bdi_work work; + struct wb_writeback_args args = { + .sb = sb, + .sync_mode = WB_SYNC_ALL, + .nr_pages = LONG_MAX, + .range_cyclic = 0, + }; + struct bdi_work work; - bdi_work_init(&work, wbc); - work.state |= WS_ONSTACK; + bdi_work_init(&work, &args); + work.state |= WS_ONSTACK; - bdi_queue_work(wbc->bdi, &work); - bdi_wait_on_work_clear(&work); - } + bdi_queue_work(bdi, &work); + bdi_wait_on_work_clear(&work); +} + +/** + * bdi_start_writeback - start writeback + * @bdi: the backing device to write from + * @nr_pages: the number of pages to write + * + * Description: + * This does WB_SYNC_NONE opportunistic writeback. The IO is only + * started when this function returns, we make no guarentees on + * completion. Caller need not hold sb s_umount semaphore. + * + */ +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) +{ + struct wb_writeback_args args = { + .sync_mode = WB_SYNC_NONE, + .nr_pages = nr_pages, + .range_cyclic = 1, + }; + + bdi_alloc_queue_work(bdi, &args); } /* @@ -863,23 +889,25 @@ int bdi_writeback_task(struct bdi_writeback *wb) } /* - * Schedule writeback for all backing devices. Can only be used for - * WB_SYNC_NONE writeback, WB_SYNC_ALL should use bdi_start_writeback() - * and pass in the superblock. + * Schedule writeback for all backing devices. This does WB_SYNC_NONE + * writeback, for integrity writeback see bdi_sync_writeback(). */ -static void bdi_writeback_all(struct writeback_control *wbc) +static void bdi_writeback_all(struct super_block *sb, long nr_pages) { + struct wb_writeback_args args = { + .sb = sb, + .nr_pages = nr_pages, + .sync_mode = WB_SYNC_NONE, + }; struct backing_dev_info *bdi; - WARN_ON(wbc->sync_mode == WB_SYNC_ALL); - rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { if (!bdi_has_dirty_io(bdi)) continue; - bdi_alloc_queue_work(bdi, wbc); + bdi_alloc_queue_work(bdi, &args); } rcu_read_unlock(); @@ -891,17 +919,10 @@ static void bdi_writeback_all(struct writeback_control *wbc) */ void wakeup_flusher_threads(long nr_pages) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_NONE, - .older_than_this = NULL, - .range_cyclic = 1, - }; - if (nr_pages == 0) nr_pages = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS); - wbc.nr_to_write = nr_pages; - bdi_writeback_all(&wbc); + bdi_writeback_all(NULL, nr_pages); } static noinline void block_dump___mark_inode_dirty(struct inode *inode) @@ -1048,7 +1069,7 @@ EXPORT_SYMBOL(__mark_inode_dirty); * on the writer throttling path, and we get decent balancing between many * throttled threads: we don't want them all piling up on inode_sync_wait. */ -static void wait_sb_inodes(struct writeback_control *wbc) +static void wait_sb_inodes(struct super_block *sb) { struct inode *inode, *old_inode = NULL; @@ -1056,7 +1077,7 @@ static void wait_sb_inodes(struct writeback_control *wbc) * We need to be protected against the filesystem going from * r/o to r/w or vice versa. */ - WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount)); + WARN_ON(!rwsem_is_locked(&sb->s_umount)); spin_lock(&inode_lock); @@ -1067,7 +1088,7 @@ static void wait_sb_inodes(struct writeback_control *wbc) * In which case, the inode may not be on the dirty list, but * we still have to wait for that writeout. */ - list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) { + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { struct address_space *mapping; if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) @@ -1107,14 +1128,8 @@ static void wait_sb_inodes(struct writeback_control *wbc) * for IO completion of submitted IO. The number of pages submitted is * returned. */ -long writeback_inodes_sb(struct super_block *sb) +void writeback_inodes_sb(struct super_block *sb) { - struct writeback_control wbc = { - .sb = sb, - .sync_mode = WB_SYNC_NONE, - .range_start = 0, - .range_end = LLONG_MAX, - }; unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); long nr_to_write; @@ -1122,9 +1137,7 @@ long writeback_inodes_sb(struct super_block *sb) nr_to_write = nr_dirty + nr_unstable + (inodes_stat.nr_inodes - inodes_stat.nr_unused); - wbc.nr_to_write = nr_to_write; - bdi_writeback_all(&wbc); - return nr_to_write - wbc.nr_to_write; + bdi_writeback_all(sb, nr_to_write); } EXPORT_SYMBOL(writeback_inodes_sb); @@ -1135,21 +1148,10 @@ EXPORT_SYMBOL(writeback_inodes_sb); * This function writes and waits on any dirty inode belonging to this * super_block. The number of pages synced is returned. */ -long sync_inodes_sb(struct super_block *sb) +void sync_inodes_sb(struct super_block *sb) { - struct writeback_control wbc = { - .sb = sb, - .bdi = sb->s_bdi, - .sync_mode = WB_SYNC_ALL, - .range_start = 0, - .range_end = LLONG_MAX, - }; - long nr_to_write = LONG_MAX; /* doesn't actually matter */ - - wbc.nr_to_write = nr_to_write; - bdi_start_writeback(&wbc); - wait_sb_inodes(&wbc); - return nr_to_write - wbc.nr_to_write; + bdi_sync_writeback(sb->s_bdi, sb); + wait_sb_inodes(sb); } EXPORT_SYMBOL(sync_inodes_sb); diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 1c8991b0db13..ee1ce68fd98b 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -54,29 +54,15 @@ * @nr_to_write: how many dirty pages to write-back * * This function shrinks UBIFS liability by means of writing back some amount - * of dirty inodes and their pages. Returns the amount of pages which were - * written back. The returned value does not include dirty inodes which were - * synchronized. + * of dirty inodes and their pages. * * Note, this function synchronizes even VFS inodes which are locked * (@i_mutex) by the caller of the budgeting function, because write-back does * not touch @i_mutex. */ -static int shrink_liability(struct ubifs_info *c, int nr_to_write) +static void shrink_liability(struct ubifs_info *c, int nr_to_write) { - int nr_written; - - nr_written = writeback_inodes_sb(c->vfs_sb); - if (!nr_written) { - /* - * Re-try again but wait on pages/inodes which are being - * written-back concurrently (e.g., by pdflush). - */ - nr_written = sync_inodes_sb(c->vfs_sb); - } - - dbg_budg("%d pages were written back", nr_written); - return nr_written; + writeback_inodes_sb(c->vfs_sb); } /** diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 859e797f4576..0ee33c2e6129 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -101,7 +101,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...); int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); void bdi_unregister(struct backing_dev_info *bdi); -void bdi_start_writeback(struct writeback_control *wbc); +void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages); int bdi_writeback_task(struct bdi_writeback *wb); int bdi_has_dirty_io(struct backing_dev_info *bdi); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 48a054e2b716..75cf58666ff9 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -68,8 +68,8 @@ struct writeback_control { */ struct bdi_writeback; int inode_wait(void *); -long writeback_inodes_sb(struct super_block *); -long sync_inodes_sb(struct super_block *); +void writeback_inodes_sb(struct super_block *); +void sync_inodes_sb(struct super_block *); void writeback_inodes_wbc(struct writeback_control *wbc); long wb_do_writeback(struct bdi_writeback *wb, int force_wait); void wakeup_flusher_threads(long nr_pages); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 12c3d843ce93..1eea4fa0d410 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -582,16 +582,8 @@ static void balance_dirty_pages(struct address_space *mapping) if ((laptop_mode && pages_written) || (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS)) - > background_thresh))) { - struct writeback_control wbc = { - .bdi = bdi, - .sync_mode = WB_SYNC_NONE, - .nr_to_write = nr_writeback, - }; - - - bdi_start_writeback(&wbc); - } + > background_thresh))) + bdi_start_writeback(bdi, nr_writeback); } void set_page_dirty_balance(struct page *page, int page_mkwrite) -- cgit v1.2.3