summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/disk-io.c76
-rw-r--r--fs/btrfs/extent_io.c497
-rw-r--r--fs/btrfs/extent_io.h24
-rw-r--r--fs/btrfs/inode.c2
4 files changed, 390 insertions, 209 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0ba055e03eb8..c54aec87e89d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -333,7 +333,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
0, &cached_state, GFP_NOFS);
- if (extent_buffer_uptodate(io_tree, eb, cached_state) &&
+ if (extent_buffer_uptodate(eb) &&
btrfs_header_generation(eb) == parent_transid) {
ret = 0;
goto out;
@@ -344,7 +344,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
(unsigned long long)parent_transid,
(unsigned long long)btrfs_header_generation(eb));
ret = 1;
- clear_extent_buffer_uptodate(io_tree, eb, &cached_state);
+ clear_extent_buffer_uptodate(eb);
out:
unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
&cached_state, GFP_NOFS);
@@ -566,7 +566,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
tree = &BTRFS_I(page->mapping->host)->io_tree;
eb = (struct extent_buffer *)page->private;
- reads_done = atomic_dec_and_test(&eb->pages_reading);
+ /* the pending IO might have been the only thing that kept this buffer
+ * in memory. Make sure we have a ref for all this other checks
+ */
+ extent_buffer_get(eb);
+
+ reads_done = atomic_dec_and_test(&eb->io_pages);
if (!reads_done)
goto err;
@@ -606,14 +611,17 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
ret = -EIO;
}
+ if (!ret)
+ set_extent_buffer_uptodate(eb);
err:
if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
btree_readahead_hook(root, eb, eb->start, ret);
}
- if (ret && eb)
- clear_extent_buffer_uptodate(tree, eb, NULL);
+ if (ret)
+ clear_extent_buffer_uptodate(eb);
+ free_extent_buffer(eb);
out:
return ret;
}
@@ -878,20 +886,6 @@ static int btree_migratepage(struct address_space *mapping,
}
#endif
-static int btree_writepage(struct page *page, struct writeback_control *wbc)
-{
- struct extent_io_tree *tree;
- tree = &BTRFS_I(page->mapping->host)->io_tree;
-
- if (!(current->flags & PF_MEMALLOC)) {
- return extent_write_full_page(tree, page,
- btree_get_extent, wbc);
- }
-
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
-}
static int btree_writepages(struct address_space *mapping,
struct writeback_control *wbc)
@@ -911,7 +905,7 @@ static int btree_writepages(struct address_space *mapping,
if (num_dirty < thresh)
return 0;
}
- return extent_writepages(tree, mapping, btree_get_extent, wbc);
+ return btree_write_cache_pages(mapping, wbc);
}
static int btree_readpage(struct file *file, struct page *page)
@@ -950,15 +944,28 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
}
}
+static int btree_set_page_dirty(struct page *page)
+{
+ struct extent_buffer *eb;
+
+ BUG_ON(!PagePrivate(page));
+ eb = (struct extent_buffer *)page->private;
+ BUG_ON(!eb);
+ BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+ BUG_ON(!atomic_read(&eb->refs));
+ btrfs_assert_tree_locked(eb);
+ return __set_page_dirty_nobuffers(page);
+}
+
static const struct address_space_operations btree_aops = {
.readpage = btree_readpage,
- .writepage = btree_writepage,
.writepages = btree_writepages,
.releasepage = btree_releasepage,
.invalidatepage = btree_invalidatepage,
#ifdef CONFIG_MIGRATION
.migratepage = btree_migratepage,
#endif
+ .set_page_dirty = btree_set_page_dirty,
};
int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
@@ -1001,7 +1008,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
free_extent_buffer(buf);
return -EIO;
- } else if (extent_buffer_uptodate(io_tree, buf, NULL)) {
+ } else if (extent_buffer_uptodate(buf)) {
*eb = buf;
} else {
free_extent_buffer(buf);
@@ -1054,9 +1061,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
return NULL;
ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
-
- if (ret == 0)
- set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
return buf;
}
@@ -1064,7 +1068,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf)
{
- struct inode *btree_inode = root->fs_info->btree_inode;
if (btrfs_header_generation(buf) ==
root->fs_info->running_transaction->transid) {
btrfs_assert_tree_locked(buf);
@@ -1080,8 +1083,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
/* ugh, clear_extent_buffer_dirty needs to lock the page */
btrfs_set_lock_blocking(buf);
- clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
- buf);
+ clear_extent_buffer_dirty(buf);
}
return 0;
}
@@ -1948,6 +1950,7 @@ int open_ctree(struct super_block *sb,
RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
fs_info->btree_inode->i_mapping);
+ BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
@@ -3058,8 +3061,7 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
int ret;
struct inode *btree_inode = buf->pages[0]->mapping->host;
- ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf,
- NULL);
+ ret = extent_buffer_uptodate(buf);
if (!ret)
return ret;
@@ -3070,16 +3072,13 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
{
- struct inode *btree_inode = buf->pages[0]->mapping->host;
- return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree,
- buf);
+ return set_extent_buffer_uptodate(buf);
}
void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
u64 transid = btrfs_header_generation(buf);
- struct inode *btree_inode = root->fs_info->btree_inode;
int was_dirty;
btrfs_assert_tree_locked(buf);
@@ -3091,8 +3090,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
(unsigned long long)root->fs_info->generation);
WARN_ON(1);
}
- was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
- buf);
+ was_dirty = set_extent_buffer_dirty(buf);
if (!was_dirty) {
spin_lock(&root->fs_info->delalloc_lock);
root->fs_info->dirty_metadata_bytes += buf->len;
@@ -3147,11 +3145,7 @@ void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
{
struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
- int ret;
- ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
- if (ret == 0)
- set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
- return ret;
+ return btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
}
static int btree_lock_page_hook(struct page *page, void *data,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4a97d8fd958d..c1b898d590d7 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -19,6 +19,7 @@
#include "btrfs_inode.h"
#include "volumes.h"
#include "check-integrity.h"
+#include "locking.h"
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
@@ -53,6 +54,8 @@ struct extent_page_data {
unsigned int sync_io:1;
};
+static noinline void flush_write_bio(void *data);
+
int __init extent_io_init(void)
{
extent_state_cache = kmem_cache_create("extent_state",
@@ -2337,7 +2340,7 @@ error_handled:
}
}
- if (uptodate) {
+ if (uptodate && tree->track_uptodate) {
set_extent_uptodate(tree, start, end, &cached,
GFP_ATOMIC);
}
@@ -2973,6 +2976,275 @@ done_unlocked:
return 0;
}
+static int eb_wait(void *word)
+{
+ io_schedule();
+ return 0;
+}
+
+static void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
+{
+ wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait,
+ TASK_UNINTERRUPTIBLE);
+}
+
+static int lock_extent_buffer_for_io(struct extent_buffer *eb,
+ struct btrfs_fs_info *fs_info,
+ struct extent_page_data *epd)
+{
+ unsigned long i, num_pages;
+ int flush = 0;
+ int ret = 0;
+
+ if (!btrfs_try_tree_write_lock(eb)) {
+ flush = 1;
+ flush_write_bio(epd);
+ btrfs_tree_lock(eb);
+ }
+
+ if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
+ btrfs_tree_unlock(eb);
+ if (!epd->sync_io)
+ return 0;
+ if (!flush) {
+ flush_write_bio(epd);
+ flush = 1;
+ }
+ wait_on_extent_buffer_writeback(eb);
+ btrfs_tree_lock(eb);
+ if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
+ printk(KERN_ERR "Um, ok?\n");
+ btrfs_tree_unlock(eb);
+ return 0;
+ }
+ }
+
+ if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+ set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
+ btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
+ spin_lock(&fs_info->delalloc_lock);
+ if (fs_info->dirty_metadata_bytes >= eb->len)
+ fs_info->dirty_metadata_bytes -= eb->len;
+ else
+ WARN_ON(1);
+ spin_unlock(&fs_info->delalloc_lock);
+ ret = 1;
+ }
+
+ btrfs_tree_unlock(eb);
+
+ if (!ret)
+ return ret;
+
+ num_pages = num_extent_pages(eb->start, eb->len);
+ for (i = 0; i < num_pages; i++) {
+ struct page *p = extent_buffer_page(eb, i);
+
+ if (!trylock_page(p)) {
+ if (!flush) {
+ flush_write_bio(epd);
+ flush = 1;
+ }
+ lock_page(p);
+ }
+ }
+
+ return ret;
+}
+
+static void end_extent_buffer_writeback(struct extent_buffer *eb)
+{
+ clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
+}
+
+static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
+{
+ int uptodate = err == 0;
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct extent_buffer *eb;
+ int done;
+
+ do {
+ struct page *page = bvec->bv_page;
+
+ bvec--;
+ eb = (struct extent_buffer *)page->private;
+ BUG_ON(!eb);
+ done = atomic_dec_and_test(&eb->io_pages);
+
+ if (!uptodate || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
+ set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
+ ClearPageUptodate(page);
+ SetPageError(page);
+ }
+
+ end_page_writeback(page);
+
+ if (!done)
+ continue;
+
+ end_extent_buffer_writeback(eb);
+ } while (bvec >= bio->bi_io_vec);
+
+ bio_put(bio);
+
+}
+
+static int write_one_eb(struct extent_buffer *eb,
+ struct btrfs_fs_info *fs_info,
+ struct writeback_control *wbc,
+ struct extent_page_data *epd)
+{
+ struct block_device *bdev = fs_info->fs_devices->latest_bdev;
+ u64 offset = eb->start;
+ unsigned long i, num_pages;
+ int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
+ int ret;
+
+ clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
+ num_pages = num_extent_pages(eb->start, eb->len);
+ atomic_set(&eb->io_pages, num_pages);
+ for (i = 0; i < num_pages; i++) {
+ struct page *p = extent_buffer_page(eb, i);
+
+ clear_page_dirty_for_io(p);
+ set_page_writeback(p);
+ ret = submit_extent_page(rw, eb->tree, p, offset >> 9,
+ PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
+ -1, end_bio_extent_buffer_writepage,
+ 0, 0, 0);
+ if (ret) {
+ set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
+ SetPageError(p);
+ if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
+ end_extent_buffer_writeback(eb);
+ ret = -EIO;
+ break;
+ }
+ offset += PAGE_CACHE_SIZE;
+ update_nr_written(p, wbc, 1);
+ unlock_page(p);
+ }
+
+ if (unlikely(ret)) {
+ for (; i < num_pages; i++) {
+ struct page *p = extent_buffer_page(eb, i);
+ unlock_page(p);
+ }
+ }
+
+ return ret;
+}
+
+int btree_write_cache_pages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
+ struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
+ struct extent_buffer *eb, *prev_eb = NULL;
+ struct extent_page_data epd = {
+ .bio = NULL,
+ .tree = tree,
+ .extent_locked = 0,
+ .sync_io = wbc->sync_mode == WB_SYNC_ALL,
+ };
+ int ret = 0;
+ int done = 0;
+ int nr_to_write_done = 0;
+ struct pagevec pvec;
+ int nr_pages;
+ pgoff_t index;
+ pgoff_t end; /* Inclusive */
+ int scanned = 0;
+ int tag;
+
+ pagevec_init(&pvec, 0);
+ if (wbc->range_cyclic) {
+ index = mapping->writeback_index; /* Start from prev offset */
+ end = -1;
+ } else {
+ index = wbc->range_start >> PAGE_CACHE_SHIFT;
+ end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ scanned = 1;
+ }
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ tag = PAGECACHE_TAG_TOWRITE;
+ else
+ tag = PAGECACHE_TAG_DIRTY;
+retry:
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ tag_pages_for_writeback(mapping, index, end);
+ while (!done && !nr_to_write_done && (index <= end) &&
+ (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+ unsigned i;
+
+ scanned = 1;
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+
+ if (!PagePrivate(page))
+ continue;
+
+ if (!wbc->range_cyclic && page->index > end) {
+ done = 1;
+ break;
+ }
+
+ eb = (struct extent_buffer *)page->private;
+ if (!eb) {
+ WARN_ON(1);
+ continue;
+ }
+
+ if (eb == prev_eb)
+ continue;
+
+ if (!atomic_inc_not_zero(&eb->refs)) {
+ WARN_ON(1);
+ continue;
+ }
+
+ prev_eb = eb;
+ ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
+ if (!ret) {
+ free_extent_buffer(eb);
+ continue;
+ }
+
+ ret = write_one_eb(eb, fs_info, wbc, &epd);
+ if (ret) {
+ done = 1;
+ free_extent_buffer(eb);
+ break;
+ }
+ free_extent_buffer(eb);
+
+ /*
+ * the filesystem may choose to bump up nr_to_write.
+ * We have to make sure to honor the new nr_to_write
+ * at any time
+ */
+ nr_to_write_done = wbc->nr_to_write <= 0;
+ }
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+ if (!scanned && !done) {
+ /*
+ * We hit the last page and there is more work to be done: wrap
+ * back to the start of the file
+ */
+ scanned = 1;
+ index = 0;
+ goto retry;
+ }
+ flush_write_bio(&epd);
+ return ret;
+}
+
/**
* write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
* @mapping: address space structure to write
@@ -3609,7 +3881,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
#endif
spin_lock_init(&eb->refs_lock);
atomic_set(&eb->refs, 1);
- atomic_set(&eb->pages_reading, 0);
+ atomic_set(&eb->io_pages, 0);
if (len > MAX_INLINE_EXTENT_BUFFER_SIZE) {
struct page **pages;
@@ -3628,6 +3900,13 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
return eb;
}
+static int extent_buffer_under_io(struct extent_buffer *eb)
+{
+ return (atomic_read(&eb->io_pages) ||
+ test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
+ test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+}
+
/*
* Helper for releasing extent buffer page.
*/
@@ -3637,6 +3916,8 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
unsigned long index;
struct page *page;
+ BUG_ON(extent_buffer_under_io(eb));
+
index = num_extent_pages(eb->start, eb->len);
if (start_idx >= index)
return;
@@ -3655,6 +3936,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
*/
if (PagePrivate(page) &&
page->private == (unsigned long)eb) {
+ BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
BUG_ON(PageDirty(page));
BUG_ON(PageWriteback(page));
/*
@@ -3683,10 +3965,41 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
__free_extent_buffer(eb);
}
+static void check_buffer_tree_ref(struct extent_buffer *eb)
+{
+ /* the ref bit is tricky. We have to make sure it is set
+ * if we have the buffer dirty. Otherwise the
+ * code to free a buffer can end up dropping a dirty
+ * page
+ *
+ * Once the ref bit is set, it won't go away while the
+ * buffer is dirty or in writeback, and it also won't
+ * go away while we have the reference count on the
+ * eb bumped.
+ *
+ * We can't just set the ref bit without bumping the
+ * ref on the eb because free_extent_buffer might
+ * see the ref bit and try to clear it. If this happens
+ * free_extent_buffer might end up dropping our original
+ * ref by mistake and freeing the page before we are able
+ * to add one more ref.
+ *
+ * So bump the ref count first, then set the bit. If someone
+ * beat us to it, drop the ref we added.
+ */
+ if (!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
+ atomic_inc(&eb->refs);
+ if (test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
+ atomic_dec(&eb->refs);
+ }
+}
+
static void mark_extent_buffer_accessed(struct extent_buffer *eb)
{
unsigned long num_pages, i;
+ check_buffer_tree_ref(eb);
+
num_pages = num_extent_pages(eb->start, eb->len);
for (i = 0; i < num_pages; i++) {
struct page *p = extent_buffer_page(eb, i);
@@ -3744,15 +4057,17 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
goto free_eb;
}
- /*
+ /*
* Do this so attach doesn't complain and we need to
* drop the ref the old guy had.
*/
ClearPagePrivate(p);
+ WARN_ON(PageDirty(p));
page_cache_release(p);
}
attach_extent_buffer_page(eb, p);
spin_unlock(&mapping->private_lock);
+ WARN_ON(PageDirty(p));
mark_page_accessed(p);
eb->pages[i] = p;
if (!PageUptodate(p))
@@ -3788,8 +4103,7 @@ again:
}
/* add one reference for the tree */
spin_lock(&eb->refs_lock);
- atomic_inc(&eb->refs);
- set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags);
+ check_buffer_tree_ref(eb);
spin_unlock(&eb->refs_lock);
spin_unlock(&tree->buffer_lock);
radix_tree_preload_end();
@@ -3849,90 +4163,15 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
__free_extent_buffer(eb);
}
-static int extent_buffer_under_io(struct extent_buffer *eb,
- struct page *locked_page)
-{
- unsigned long num_pages, i;
-
- num_pages = num_extent_pages(eb->start, eb->len);
- for (i = 0; i < num_pages; i++) {
- struct page *page = eb->pages[i];
- int need_unlock = 0;
-
- if (!page)
- continue;
-
- if (page != locked_page) {
- if (!trylock_page(page))
- return 1;
- need_unlock = 1;
- }
-
- if (PageDirty(page) || PageWriteback(page)) {
- if (need_unlock)
- unlock_page(page);
- return 1;
- }
- if (need_unlock)
- unlock_page(page);
- }
-
- return 0;
-}
-
/* Expects to have eb->eb_lock already held */
static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
{
WARN_ON(atomic_read(&eb->refs) == 0);
if (atomic_dec_and_test(&eb->refs)) {
struct extent_io_tree *tree = eb->tree;
- int ret;
spin_unlock(&eb->refs_lock);
- might_sleep_if(mask & __GFP_WAIT);
- ret = clear_extent_bit(tree, eb->start,
- eb->start + eb->len - 1, -1, 0, 0,
- NULL, mask);
- if (ret < 0) {
- unsigned long num_pages, i;
-
- num_pages = num_extent_pages(eb->start, eb->len);
- /*
- * We failed to clear the state bits which likely means
- * ENOMEM, so just re-up the eb ref and continue, we
- * will get freed later on via releasepage or something
- * else and will be ok.
- */
- spin_lock(&eb->tree->mapping->private_lock);
- spin_lock(&eb->refs_lock);
- set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags);
- atomic_inc(&eb->refs);
-
- /*
- * We may have started to reclaim the pages for a newly
- * allocated eb, make sure we own all of them again.
- */
- for (i = 0; i < num_pages; i++) {
- struct page *page = eb->pages[i];
-
- if (!page) {
- WARN_ON(1);
- continue;
- }
-
- BUG_ON(!PagePrivate(page));
- if (page->private != (unsigned long)eb) {
- ClearPagePrivate(page);
- page_cache_release(page);
- attach_extent_buffer_page(eb, page);
- }
- }
- spin_unlock(&eb->refs_lock);
- spin_unlock(&eb->tree->mapping->private_lock);
- return;
- }
-
spin_lock(&tree->buffer_lock);
radix_tree_delete(&tree->buffer,
eb->start >> PAGE_CACHE_SHIFT);
@@ -3955,7 +4194,7 @@ void free_extent_buffer(struct extent_buffer *eb)
spin_lock(&eb->refs_lock);
if (atomic_read(&eb->refs) == 2 &&
test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
- !extent_buffer_under_io(eb, NULL) &&
+ !extent_buffer_under_io(eb) &&
test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
atomic_dec(&eb->refs);
@@ -3974,20 +4213,20 @@ void free_extent_buffer_stale(struct extent_buffer *eb)
spin_lock(&eb->refs_lock);
set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
- if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb, NULL) &&
+ if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
atomic_dec(&eb->refs);
release_extent_buffer(eb, GFP_NOFS);
}
-int clear_extent_buffer_dirty(struct extent_io_tree *tree,
- struct extent_buffer *eb)
+int clear_extent_buffer_dirty(struct extent_buffer *eb)
{
unsigned long i;
unsigned long num_pages;
struct page *page;
num_pages = num_extent_pages(eb->start, eb->len);
+ WARN_ON(atomic_read(&eb->refs) == 0);
for (i = 0; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
@@ -4008,25 +4247,30 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
ClearPageError(page);
unlock_page(page);
}
+ WARN_ON(atomic_read(&eb->refs) == 0);
return 0;
}
-int set_extent_buffer_dirty(struct extent_io_tree *tree,
- struct extent_buffer *eb)
+int set_extent_buffer_dirty(struct extent_buffer *eb)
{
unsigned long i;
unsigned long num_pages;
int was_dirty = 0;
+ check_buffer_tree_ref(eb);
+
was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
+
num_pages = num_extent_pages(eb->start, eb->len);
WARN_ON(atomic_read(&eb->refs) == 0);
+ WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
+
for (i = 0; i < num_pages; i++)
- __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
+ set_page_dirty(extent_buffer_page(eb, i));
return was_dirty;
}
-static int __eb_straddles_pages(u64 start, u64 len)
+static int range_straddles_pages(u64 start, u64 len)
{
if (len < PAGE_CACHE_SIZE)
return 1;
@@ -4037,25 +4281,14 @@ static int __eb_straddles_pages(u64 start, u64 len)
return 0;
}
-static int eb_straddles_pages(struct extent_buffer *eb)
-{
- return __eb_straddles_pages(eb->start, eb->len);
-}
-
-int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
- struct extent_buffer *eb,
- struct extent_state **cached_state)
+int clear_extent_buffer_uptodate(struct extent_buffer *eb)
{
unsigned long i;
struct page *page;
unsigned long num_pages;
- num_pages = num_extent_pages(eb->start, eb->len);
clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
-
- clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
- cached_state, GFP_NOFS);
-
+ num_pages = num_extent_pages(eb->start, eb->len);
for (i = 0; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
if (page)
@@ -4064,27 +4297,16 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
return 0;
}
-int set_extent_buffer_uptodate(struct extent_io_tree *tree,
- struct extent_buffer *eb)
+int set_extent_buffer_uptodate(struct extent_buffer *eb)
{
unsigned long i;
struct page *page;
unsigned long num_pages;
+ set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
num_pages = num_extent_pages(eb->start, eb->len);
-
- if (eb_straddles_pages(eb)) {
- set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
- NULL, GFP_NOFS);
- }
for (i = 0; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
- if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
- ((i == num_pages - 1) &&
- ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
- check_page_uptodate(tree, page);
- continue;
- }
SetPageUptodate(page);
}
return 0;
@@ -4099,7 +4321,7 @@ int extent_range_uptodate(struct extent_io_tree *tree,
int uptodate;
unsigned long index;
- if (__eb_straddles_pages(start, end - start + 1)) {
+ if (range_straddles_pages(start, end - start + 1)) {
ret = test_range_bit(tree, start, end,
EXTENT_UPTODATE, 1, NULL);
if (ret)
@@ -4121,35 +4343,9 @@ int extent_range_uptodate(struct extent_io_tree *tree,
return pg_uptodate;
}
-int extent_buffer_uptodate(struct extent_io_tree *tree,
- struct extent_buffer *eb,
- struct extent_state *cached_state)
+int extent_buffer_uptodate(struct extent_buffer *eb)
{
- int ret = 0;
- unsigned long num_pages;
- unsigned long i;
- struct page *page;
- int pg_uptodate = 1;
-
- if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
- return 1;
-
- if (eb_straddles_pages(eb)) {
- ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
- EXTENT_UPTODATE, 1, cached_state);
- if (ret)
- return ret;
- }
-
- num_pages = num_extent_pages(eb->start, eb->len);
- for (i = 0; i < num_pages; i++) {
- page = extent_buffer_page(eb, i);
- if (!PageUptodate(page)) {
- pg_uptodate = 0;
- break;
- }
- }
- return pg_uptodate;
+ return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
}
int read_extent_buffer_pages(struct extent_io_tree *tree,
@@ -4171,13 +4367,6 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 0;
- if (eb_straddles_pages(eb)) {
- if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
- EXTENT_UPTODATE, 1, NULL)) {
- return 0;
- }
- }
-
if (start) {
WARN_ON(start < eb->start);
start_i = (start >> PAGE_CACHE_SHIFT) -
@@ -4207,7 +4396,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
goto unlock_exit;
}
- atomic_set(&eb->pages_reading, num_reads);
+ atomic_set(&eb->io_pages, num_reads);
for (i = start_i; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
if (!PageUptodate(page)) {
@@ -4235,8 +4424,6 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
ret = -EIO;
}
- if (!ret)
- set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
return ret;
unlock_exit:
@@ -4604,13 +4791,13 @@ int try_release_extent_buffer(struct page *page, gfp_t mask)
eb = (struct extent_buffer *)page->private;
BUG_ON(!eb);
- /*
+ /*
* This is a little awful but should be ok, we need to make sure that
* the eb doesn't disappear out from under us while we're looking at
* this page.
*/
spin_lock(&eb->refs_lock);
- if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb, page)) {
+ if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
spin_unlock(&eb->refs_lock);
spin_unlock(&page->mapping->private_lock);
return 0;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 60628341f156..489d7945154f 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -37,6 +37,8 @@
#define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */
#define EXTENT_BUFFER_TREE_REF 5
#define EXTENT_BUFFER_STALE 6
+#define EXTENT_BUFFER_WRITEBACK 7
+#define EXTENT_BUFFER_IOERR 8
/* these are flags for extent_clear_unlock_delalloc */
#define EXTENT_CLEAR_UNLOCK_PAGE 0x1
@@ -99,6 +101,7 @@ struct extent_io_tree {
struct radix_tree_root buffer;
struct address_space *mapping;
u64 dirty_bytes;
+ int track_uptodate;
spinlock_t lock;
spinlock_t buffer_lock;
struct extent_io_ops *ops;
@@ -132,7 +135,7 @@ struct extent_buffer {
struct extent_io_tree *tree;
spinlock_t refs_lock;
atomic_t refs;
- atomic_t pages_reading;
+ atomic_t io_pages;
struct list_head leak_list;
struct rcu_head rcu_head;
pid_t lock_owner;
@@ -249,6 +252,8 @@ int extent_writepages(struct extent_io_tree *tree,
struct address_space *mapping,
get_extent_t *get_extent,
struct writeback_control *wbc);
+int btree_write_cache_pages(struct address_space *mapping,
+ struct writeback_control *wbc);
int extent_readpages(struct extent_io_tree *tree,
struct address_space *mapping,
struct list_head *pages, unsigned nr_pages,
@@ -297,18 +302,11 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
void memset_extent_buffer(struct extent_buffer *eb, char c,
unsigned long start, unsigned long len);
int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
-int clear_extent_buffer_dirty(struct extent_io_tree *tree,
- struct extent_buffer *eb);
-int set_extent_buffer_dirty(struct extent_io_tree *tree,
- struct extent_buffer *eb);
-int set_extent_buffer_uptodate(struct extent_io_tree *tree,
- struct extent_buffer *eb);
-int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
- struct extent_buffer *eb,
- struct extent_state **cached_state);
-int extent_buffer_uptodate(struct extent_io_tree *tree,
- struct extent_buffer *eb,
- struct extent_state *cached_state);
+int clear_extent_buffer_dirty(struct extent_buffer *eb);
+int set_extent_buffer_dirty(struct extent_buffer *eb);
+int set_extent_buffer_uptodate(struct extent_buffer *eb);
+int clear_extent_buffer_uptodate(struct extent_buffer *eb);
+int extent_buffer_uptodate(struct extent_buffer *eb);
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
unsigned long min_len, char **map,
unsigned long *map_start,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bb268193d85d..341a8670165f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6782,6 +6782,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
extent_map_tree_init(&ei->extent_tree);
extent_io_tree_init(&ei->io_tree, &inode->i_data);
extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
+ ei->io_tree.track_uptodate = 1;
+ ei->io_failure_tree.track_uptodate = 1;
mutex_init(&ei->log_mutex);
mutex_init(&ei->delalloc_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);