summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-06 00:06:25 +0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-06 00:06:25 +0400
commit5eecb9cc9029aef3c308fc94149c4a3065d40d9a (patch)
treeb5a7f7bf298ef0efc4ff5577b9a6a4dcec3db028 /fs
parent62ad64498a2e72eb9fb70ab80e20ecc9cac8d40b (diff)
parentb6305567e7d31b0bec1b8cb9ec0cadd7f7086f5f (diff)
downloadlinux-5eecb9cc9029aef3c308fc94149c4a3065d40d9a.tar.xz
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "I held off on my rc5 pull because I hit an oops during log recovery after a crash. I wanted to make sure it wasn't a regression because we have some logging fixes in here. It turns out that a commit during the merge window just made it much more likely to trigger directory logging instead of full commits, which exposed an old bug. The new backref walking code got some additional fixes. This should be the final set of them. Josef fixed up a corner where our O_DIRECT writes and buffered reads could expose old file contents (not stale, just not the most recent). He and Liu Bo fixed crashes during tree log recover as well. Ilya fixed errors while we resume disk balancing operations on readonly mounts." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: Btrfs: run delayed directory updates during log replay Btrfs: hold a ref on the inode during writepages Btrfs: fix tree log remove space corner case Btrfs: fix wrong check during log recovery Btrfs: use _IOR for BTRFS_IOC_SUBVOL_GETFLAGS Btrfs: resume balance on rw (re)mounts properly Btrfs: restore restriper state on all mounts Btrfs: fix dio write vs buffered read race Btrfs: don't count I/O statistic read errors for missing devices Btrfs: resolve tree mod log locking issue in btrfs_next_leaf Btrfs: fix tree mod log rewind of ADD operations Btrfs: leave critical region in btrfs_find_all_roots as soon as possible Btrfs: always put insert_ptr modifications into the tree mod log Btrfs: fix tree mod log for root replacements at leaf level Btrfs: support root level changes in __resolve_indirect_ref Btrfs: avoid waiting for delayed refs when we must not
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/backref.c15
-rw-r--r--fs/btrfs/ctree.c60
-rw-r--r--fs/btrfs/disk-io.c34
-rw-r--r--fs/btrfs/extent-tree.c11
-rw-r--r--fs/btrfs/extent_io.c14
-rw-r--r--fs/btrfs/file.c13
-rw-r--r--fs/btrfs/free-space-cache.c145
-rw-r--r--fs/btrfs/inode.c57
-rw-r--r--fs/btrfs/ioctl.h2
-rw-r--r--fs/btrfs/super.c4
-rw-r--r--fs/btrfs/tree-log.c6
-rw-r--r--fs/btrfs/volumes.c95
-rw-r--r--fs/btrfs/volumes.h3
13 files changed, 258 insertions, 201 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 7301cdb4b2cb..a383c18e74e8 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -301,10 +301,14 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
goto out;
eb = path->nodes[level];
- if (!eb) {
- WARN_ON(1);
- ret = 1;
- goto out;
+ while (!eb) {
+ if (!level) {
+ WARN_ON(1);
+ ret = 1;
+ goto out;
+ }
+ level--;
+ eb = path->nodes[level];
}
ret = add_all_parents(root, path, parents, level, &ref->key_for_search,
@@ -835,6 +839,7 @@ again:
}
ret = __add_delayed_refs(head, delayed_ref_seq,
&prefs_delayed);
+ mutex_unlock(&head->mutex);
if (ret) {
spin_unlock(&delayed_refs->lock);
goto out;
@@ -928,8 +933,6 @@ again:
}
out:
- if (head)
- mutex_unlock(&head->mutex);
btrfs_free_path(path);
while (!list_empty(&prefs)) {
ref = list_first_entry(&prefs, struct __prelim_ref, list);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 15cbc2bf4ff0..8206b3900587 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1024,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
if (!looped && !tm)
return 0;
/*
- * we must have key remove operations in the log before the
- * replace operation.
+ * if there are no tree operation for the oldest root, we simply
+ * return it. this should only happen if that (old) root is at
+ * level 0.
*/
- BUG_ON(!tm);
+ if (!tm)
+ break;
+ /*
+ * if there's an operation that's not a root replacement, we
+ * found the oldest version of our root. normally, we'll find a
+ * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
+ */
if (tm->op != MOD_LOG_ROOT_REPLACE)
break;
@@ -1087,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
tm->generation);
break;
case MOD_LOG_KEY_ADD:
- if (tm->slot != n - 1) {
- o_dst = btrfs_node_key_ptr_offset(tm->slot);
- o_src = btrfs_node_key_ptr_offset(tm->slot + 1);
- memmove_extent_buffer(eb, o_dst, o_src, p_size);
- }
+ /* if a move operation is needed it's in the log */
n--;
break;
case MOD_LOG_MOVE_KEYS:
@@ -1192,16 +1195,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
}
tm = tree_mod_log_search(root->fs_info, logical, time_seq);
- /*
- * there was an item in the log when __tree_mod_log_oldest_root
- * returned. this one must not go away, because the time_seq passed to
- * us must be blocking its removal.
- */
- BUG_ON(!tm);
-
if (old_root)
- eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT,
- root->nodesize);
+ eb = alloc_dummy_extent_buffer(logical, root->nodesize);
else
eb = btrfs_clone_extent_buffer(root->node);
btrfs_tree_read_unlock(root->node);
@@ -1216,7 +1211,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_set_header_level(eb, old_root->level);
btrfs_set_header_generation(eb, old_generation);
}
- __tree_mod_log_rewind(eb, time_seq, tm);
+ if (tm)
+ __tree_mod_log_rewind(eb, time_seq, tm);
+ else
+ WARN_ON(btrfs_header_level(eb) != 0);
extent_buffer_get(eb);
return eb;
@@ -2995,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
static void insert_ptr(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_disk_key *key, u64 bytenr,
- int slot, int level, int tree_mod_log)
+ int slot, int level)
{
struct extent_buffer *lower;
int nritems;
@@ -3008,7 +3006,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
BUG_ON(slot > nritems);
BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
if (slot != nritems) {
- if (tree_mod_log && level)
+ if (level)
tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
slot, nritems - slot);
memmove_extent_buffer(lower,
@@ -3016,7 +3014,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
btrfs_node_key_ptr_offset(slot),
(nritems - slot) * sizeof(struct btrfs_key_ptr));
}
- if (tree_mod_log && level) {
+ if (level) {
ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
MOD_LOG_KEY_ADD);
BUG_ON(ret < 0);
@@ -3104,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(split);
insert_ptr(trans, root, path, &disk_key, split->start,
- path->slots[level + 1] + 1, level + 1, 1);
+ path->slots[level + 1] + 1, level + 1);
if (path->slots[level] >= mid) {
path->slots[level] -= mid;
@@ -3641,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(l, mid);
btrfs_item_key(right, &disk_key, 0);
insert_ptr(trans, root, path, &disk_key, right->start,
- path->slots[1] + 1, 1, 0);
+ path->slots[1] + 1, 1);
btrfs_mark_buffer_dirty(right);
btrfs_mark_buffer_dirty(l);
@@ -3848,7 +3846,7 @@ again:
if (mid <= slot) {
btrfs_set_header_nritems(right, 0);
insert_ptr(trans, root, path, &disk_key, right->start,
- path->slots[1] + 1, 1, 0);
+ path->slots[1] + 1, 1);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
@@ -3857,7 +3855,7 @@ again:
} else {
btrfs_set_header_nritems(right, 0);
insert_ptr(trans, root, path, &disk_key, right->start,
- path->slots[1], 1, 0);
+ path->slots[1], 1);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
@@ -5121,6 +5119,18 @@ again:
if (!path->skip_locking) {
ret = btrfs_try_tree_read_lock(next);
+ if (!ret && time_seq) {
+ /*
+ * If we don't get the lock, we may be racing
+ * with push_leaf_left, holding that lock while
+ * itself waiting for the leaf we've currently
+ * locked. To solve this situation, we give up
+ * on our lock and cycle.
+ */
+ btrfs_release_path(path);
+ cond_resched();
+ goto again;
+ }
if (!ret) {
btrfs_set_path_blocking(path);
btrfs_tree_read_lock(next);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7b845ff4af99..2936ca49b3b4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2354,12 +2354,17 @@ retry_root_backup:
BTRFS_CSUM_TREE_OBJECTID, csum_root);
if (ret)
goto recovery_tree_root;
-
csum_root->track_dirty = 1;
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
+ ret = btrfs_recover_balance(fs_info);
+ if (ret) {
+ printk(KERN_WARNING "btrfs: failed to recover balance\n");
+ goto fail_block_groups;
+ }
+
ret = btrfs_init_dev_stats(fs_info);
if (ret) {
printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
@@ -2485,20 +2490,23 @@ retry_root_backup:
goto fail_trans_kthread;
}
- if (!(sb->s_flags & MS_RDONLY)) {
- down_read(&fs_info->cleanup_work_sem);
- err = btrfs_orphan_cleanup(fs_info->fs_root);
- if (!err)
- err = btrfs_orphan_cleanup(fs_info->tree_root);
- up_read(&fs_info->cleanup_work_sem);
+ if (sb->s_flags & MS_RDONLY)
+ return 0;
- if (!err)
- err = btrfs_recover_balance(fs_info->tree_root);
+ down_read(&fs_info->cleanup_work_sem);
+ if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
+ (ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
+ up_read(&fs_info->cleanup_work_sem);
+ close_ctree(tree_root);
+ return ret;
+ }
+ up_read(&fs_info->cleanup_work_sem);
- if (err) {
- close_ctree(tree_root);
- return err;
- }
+ ret = btrfs_resume_balance_async(fs_info);
+ if (ret) {
+ printk(KERN_WARNING "btrfs: failed to resume balance\n");
+ close_ctree(tree_root);
+ return ret;
}
return 0;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4b5a1e1bdefb..6e1d36702ff7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2347,12 +2347,10 @@ next:
return count;
}
-
static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
- unsigned long num_refs)
+ unsigned long num_refs,
+ struct list_head *first_seq)
{
- struct list_head *first_seq = delayed_refs->seq_head.next;
-
spin_unlock(&delayed_refs->lock);
pr_debug("waiting for more refs (num %ld, first %p)\n",
num_refs, first_seq);
@@ -2381,6 +2379,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_node *ref;
struct list_head cluster;
+ struct list_head *first_seq = NULL;
int ret;
u64 delayed_start;
int run_all = count == (unsigned long)-1;
@@ -2436,8 +2435,10 @@ again:
*/
consider_waiting = 1;
num_refs = delayed_refs->num_entries;
+ first_seq = root->fs_info->tree_mod_seq_list.next;
} else {
- wait_for_more_refs(delayed_refs, num_refs);
+ wait_for_more_refs(delayed_refs,
+ num_refs, first_seq);
/*
* after waiting, things have changed. we
* dropped the lock and someone else might have
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index aaa12c1eb348..01c21b6c6d43 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3324,6 +3324,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
writepage_t writepage, void *data,
void (*flush_fn)(void *))
{
+ struct inode *inode = mapping->host;
int ret = 0;
int done = 0;
int nr_to_write_done = 0;
@@ -3334,6 +3335,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
int scanned = 0;
int tag;
+ /*
+ * We have to hold onto the inode so that ordered extents can do their
+ * work when the IO finishes. The alternative to this is failing to add
+ * an ordered extent if the igrab() fails there and that is a huge pain
+ * to deal with, so instead just hold onto the inode throughout the
+ * writepages operation. If it fails here we are freeing up the inode
+ * anyway and we'd rather not waste our time writing out stuff that is
+ * going to be truncated anyway.
+ */
+ if (!igrab(inode))
+ return 0;
+
pagevec_init(&pvec, 0);
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
@@ -3428,6 +3441,7 @@ retry:
index = 0;
goto retry;
}
+ btrfs_add_delayed_iput(inode);
return ret;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 70dc8ca73e25..9aa01ec2138d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1334,7 +1334,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
loff_t *ppos, size_t count, size_t ocount)
{
struct file *file = iocb->ki_filp;
- struct inode *inode = fdentry(file)->d_inode;
struct iov_iter i;
ssize_t written;
ssize_t written_buffered;
@@ -1344,18 +1343,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
count, ocount);
- /*
- * the generic O_DIRECT will update in-memory i_size after the
- * DIOs are done. But our endio handlers that update the on
- * disk i_size never update past the in memory i_size. So we
- * need one more update here to catch any additions to the
- * file
- */
- if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
- btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
- mark_inode_dirty(inode);
- }
-
if (written < 0 || written == count)
return written;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 81296c57405a..6c4e2baa9290 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1543,29 +1543,26 @@ again:
end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
/*
- * XXX - this can go away after a few releases.
- *
- * since the only user of btrfs_remove_free_space is the tree logging
- * stuff, and the only way to test that is under crash conditions, we
- * want to have this debug stuff here just in case somethings not
- * working. Search the bitmap for the space we are trying to use to
- * make sure its actually there. If its not there then we need to stop
- * because something has gone wrong.
+ * We need to search for bits in this bitmap. We could only cover some
+ * of the extent in this bitmap thanks to how we add space, so we need
+ * to search for as much as it as we can and clear that amount, and then
+ * go searching for the next bit.
*/
search_start = *offset;
- search_bytes = *bytes;
+ search_bytes = ctl->unit;
search_bytes = min(search_bytes, end - search_start + 1);
ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
BUG_ON(ret < 0 || search_start != *offset);
- if (*offset > bitmap_info->offset && *offset + *bytes > end) {
- bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
- *bytes -= end - *offset + 1;
- *offset = end + 1;
- } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
- bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
- *bytes = 0;
- }
+ /* We may have found more bits than what we need */
+ search_bytes = min(search_bytes, *bytes);
+
+ /* Cannot clear past the end of the bitmap */
+ search_bytes = min(search_bytes, end - search_start + 1);
+
+ bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
+ *offset += search_bytes;
+ *bytes -= search_bytes;
if (*bytes) {
struct rb_node *next = rb_next(&bitmap_info->offset_index);
@@ -1596,7 +1593,7 @@ again:
* everything over again.
*/
search_start = *offset;
- search_bytes = *bytes;
+ search_bytes = ctl->unit;
ret = search_bitmap(ctl, bitmap_info, &search_start,
&search_bytes);
if (ret < 0 || search_start != *offset)
@@ -1879,12 +1876,14 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *info;
- struct btrfs_free_space *next_info = NULL;
int ret = 0;
spin_lock(&ctl->tree_lock);
again:
+ if (!bytes)
+ goto out_lock;
+
info = tree_search_offset(ctl, offset, 0, 0);
if (!info) {
/*
@@ -1905,88 +1904,48 @@ again:
}
}
- if (info->bytes < bytes && rb_next(&info->offset_index)) {
- u64 end;
- next_info = rb_entry(rb_next(&info->offset_index),
- struct btrfs_free_space,
- offset_index);
-
- if (next_info->bitmap)
- end = next_info->offset +
- BITS_PER_BITMAP * ctl->unit - 1;
- else
- end = next_info->offset + next_info->bytes;
-
- if (next_info->bytes < bytes ||
- next_info->offset > offset || offset > end) {
- printk(KERN_CRIT "Found free space at %llu, size %llu,"
- " trying to use %llu\n",
- (unsigned long long)info->offset,
- (unsigned long long)info->bytes,
- (unsigned long long)bytes);
- WARN_ON(1);
- ret = -EINVAL;
- goto out_lock;
- }
-
- info = next_info;
- }
-
- if (info->bytes == bytes) {
+ if (!info->bitmap) {
unlink_free_space(ctl, info);
- if (info->bitmap) {
- kfree(info->bitmap);
- ctl->total_bitmaps--;
- }
- kmem_cache_free(btrfs_free_space_cachep, info);
- ret = 0;
- goto out_lock;
- }
-
- if (!info->bitmap && info->offset == offset) {
- unlink_free_space(ctl, info);
- info->offset += bytes;
- info->bytes -= bytes;
- ret = link_free_space(ctl, info);
- WARN_ON(ret);
- goto out_lock;
- }
+ if (offset == info->offset) {
+ u64 to_free = min(bytes, info->bytes);
+
+ info->bytes -= to_free;
+ info->offset += to_free;
+ if (info->bytes) {
+ ret = link_free_space(ctl, info);
+ WARN_ON(ret);
+ } else {
+ kmem_cache_free(btrfs_free_space_cachep, info);
+ }
- if (!info->bitmap && info->offset <= offset &&
- info->offset + info->bytes >= offset + bytes) {
- u64 old_start = info->offset;
- /*
- * we're freeing space in the middle of the info,
- * this can happen during tree log replay
- *
- * first unlink the old info and then
- * insert it again after the hole we're creating
- */
- unlink_free_space(ctl, info);
- if (offset + bytes < info->offset + info->bytes) {
- u64 old_end = info->offset + info->bytes;
+ offset += to_free;
+ bytes -= to_free;
+ goto again;
+ } else {
+ u64 old_end = info->bytes + info->offset;
- info->offset = offset + bytes;
- info->bytes = old_end - info->offset;
+ info->bytes = offset - info->offset;
ret = link_free_space(ctl, info);
WARN_ON(ret);
if (ret)
goto out_lock;
- } else {
- /* the hole we're creating ends at the end
- * of the info struct, just free the info
- */
- kmem_cache_free(btrfs_free_space_cachep, info);
- }
- spin_unlock(&ctl->tree_lock);
- /* step two, insert a new info struct to cover
- * anything before the hole
- */
- ret = btrfs_add_free_space(block_group, old_start,
- offset - old_start);
- WARN_ON(ret); /* -ENOMEM */
- goto out;
+ /* Not enough bytes in this entry to satisfy us */
+ if (old_end < offset + bytes) {
+ bytes -= old_end - offset;
+ offset = old_end;
+ goto again;
+ } else if (old_end == offset + bytes) {
+ /* all done */
+ goto out_lock;
+ }
+ spin_unlock(&ctl->tree_lock);
+
+ ret = btrfs_add_free_space(block_group, offset + bytes,
+ old_end - (offset + bytes));
+ WARN_ON(ret);
+ goto out;
+ }
}
ret = remove_from_bitmap(ctl, info, &offset, &bytes);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d8bb0dbc4941..a7d1921ac76b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3754,7 +3754,7 @@ void btrfs_evict_inode(struct inode *inode)
btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (root->fs_info->log_root_recovering) {
- BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
+ BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags));
goto no_delete;
}
@@ -5876,8 +5876,17 @@ map:
bh_result->b_size = len;
bh_result->b_bdev = em->bdev;
set_buffer_mapped(bh_result);
- if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
- set_buffer_new(bh_result);
+ if (create) {
+ if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ set_buffer_new(bh_result);
+
+ /*
+ * Need to update the i_size under the extent lock so buffered
+ * readers will get the updated i_size when we unlock.
+ */
+ if (start + len > i_size_read(inode))
+ i_size_write(inode, start + len);
+ }
free_extent_map(em);
@@ -6360,12 +6369,48 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
*/
ordered = btrfs_lookup_ordered_range(inode, lockstart,
lockend - lockstart + 1);
- if (!ordered)
+
+ /*
+ * We need to make sure there are no buffered pages in this
+ * range either, we could have raced between the invalidate in
+ * generic_file_direct_write and locking the extent. The
+ * invalidate needs to happen so that reads after a write do not
+ * get stale data.
+ */
+ if (!ordered && (!writing ||
+ !test_range_bit(&BTRFS_I(inode)->io_tree,
+ lockstart, lockend, EXTENT_UPTODATE, 0,
+ cached_state)))
break;
+
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
+
+ if (ordered) {
+ btrfs_start_ordered_extent(inode, ordered, 1);
+ btrfs_put_ordered_extent(ordered);
+ } else {
+ /* Screw you mmap */
+ ret = filemap_write_and_wait_range(file->f_mapping,
+ lockstart,
+ lockend);
+ if (ret)
+ goto out;
+
+ /*
+ * If we found a page that couldn't be invalidated just
+ * fall back to buffered.
+ */
+ ret = invalidate_inode_pages2_range(file->f_mapping,
+ lockstart >> PAGE_CACHE_SHIFT,
+ lockend >> PAGE_CACHE_SHIFT);
+ if (ret) {
+ if (ret == -EBUSY)
+ ret = 0;
+ goto out;
+ }
+ }
+
cond_resched();
}
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 497c530724cf..e440aa653c30 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -339,7 +339,7 @@ struct btrfs_ioctl_get_dev_stats {
#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
struct btrfs_ioctl_vol_args_v2)
-#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64)
+#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
struct btrfs_ioctl_scrub_args)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0eb9a4da069e..e23991574fdf 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1187,6 +1187,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret)
goto restore;
+ ret = btrfs_resume_balance_async(fs_info);
+ if (ret)
+ goto restore;
+
sb->s_flags &= ~MS_RDONLY;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2017d0ff511c..8abeae4224f9 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -690,6 +690,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
kfree(name);
iput(inode);
+
+ btrfs_run_delayed_items(trans, root);
return ret;
}
@@ -895,6 +897,7 @@ again:
ret = btrfs_unlink_inode(trans, root, dir,
inode, victim_name,
victim_name_len);
+ btrfs_run_delayed_items(trans, root);
}
kfree(victim_name);
ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
@@ -1475,6 +1478,9 @@ again:
ret = btrfs_unlink_inode(trans, root, dir, inode,
name, name_len);
BUG_ON(ret);
+
+ btrfs_run_delayed_items(trans, root);
+
kfree(name);
iput(inode);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8a3d2594b807..ecaad40e7ef4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2845,31 +2845,48 @@ out:
static int balance_kthread(void *data)
{
- struct btrfs_balance_control *bctl =
- (struct btrfs_balance_control *)data;
- struct btrfs_fs_info *fs_info = bctl->fs_info;
+ struct btrfs_fs_info *fs_info = data;
int ret = 0;
mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
- set_balance_control(bctl);
-
- if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
- printk(KERN_INFO "btrfs: force skipping balance\n");
- } else {
+ if (fs_info->balance_ctl) {
printk(KERN_INFO "btrfs: continuing balance\n");
- ret = btrfs_balance(bctl, NULL);
+ ret = btrfs_balance(fs_info->balance_ctl, NULL);
}
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
+
return ret;
}
-int btrfs_recover_balance(struct btrfs_root *tree_root)
+int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
{
struct task_struct *tsk;
+
+ spin_lock(&fs_info->balance_lock);
+ if (!fs_info->balance_ctl) {
+ spin_unlock(&fs_info->balance_lock);
+ return 0;
+ }
+ spin_unlock(&fs_info->balance_lock);
+
+ if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
+ printk(KERN_INFO "btrfs: force skipping balance\n");
+ return 0;
+ }
+
+ tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
+ if (IS_ERR(tsk))
+ return PTR_ERR(tsk);
+
+ return 0;
+}
+
+int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
+{
struct btrfs_balance_control *bctl;
struct btrfs_balance_item *item;
struct btrfs_disk_balance_args disk_bargs;
@@ -2882,29 +2899,30 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
if (!path)
return -ENOMEM;
- bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
- if (!bctl) {
- ret = -ENOMEM;
- goto out;
- }
-
key.objectid = BTRFS_BALANCE_OBJECTID;
key.type = BTRFS_BALANCE_ITEM_KEY;
key.offset = 0;
- ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
if (ret < 0)
- goto out_bctl;
+ goto out;
if (ret > 0) { /* ret = -ENOENT; */
ret = 0;
- goto out_bctl;
+ goto out;
+ }
+
+ bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
+ if (!bctl) {
+ ret = -ENOMEM;
+ goto out;
}
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
- bctl->fs_info = tree_root->fs_info;
- bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME;
+ bctl->fs_info = fs_info;
+ bctl->flags = btrfs_balance_flags(leaf, item);
+ bctl->flags |= BTRFS_BALANCE_RESUME;
btrfs_balance_data(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
@@ -2913,14 +2931,13 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
btrfs_balance_sys(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
- tsk = kthread_run(balance_kthread, bctl, "btrfs-balance");
- if (IS_ERR(tsk))
- ret = PTR_ERR(tsk);
- else
- goto out;
+ mutex_lock(&fs_info->volume_mutex);
+ mutex_lock(&fs_info->balance_mutex);
-out_bctl:
- kfree(bctl);
+ set_balance_control(bctl);
+
+ mutex_unlock(&fs_info->balance_mutex);
+ mutex_unlock(&fs_info->volume_mutex);
out:
btrfs_free_path(path);
return ret;
@@ -4061,16 +4078,18 @@ static void btrfs_end_bio(struct bio *bio, int err)
BUG_ON(stripe_index >= bbio->num_stripes);
dev = bbio->stripes[stripe_index].dev;
- if (bio->bi_rw & WRITE)
- btrfs_dev_stat_inc(dev,
- BTRFS_DEV_STAT_WRITE_ERRS);
- else
- btrfs_dev_stat_inc(dev,
- BTRFS_DEV_STAT_READ_ERRS);
- if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
- btrfs_dev_stat_inc(dev,
- BTRFS_DEV_STAT_FLUSH_ERRS);
- btrfs_dev_stat_print_on_error(dev);
+ if (dev->bdev) {
+ if (bio->bi_rw & WRITE)
+ btrfs_dev_stat_inc(dev,
+ BTRFS_DEV_STAT_WRITE_ERRS);
+ else
+ btrfs_dev_stat_inc(dev,
+ BTRFS_DEV_STAT_READ_ERRS);
+ if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
+ btrfs_dev_stat_inc(dev,
+ BTRFS_DEV_STAT_FLUSH_ERRS);
+ btrfs_dev_stat_print_on_error(dev);
+ }
}
}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 74366f27a76b..95f6637614db 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -281,7 +281,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
int btrfs_init_new_device(struct btrfs_root *root, char *path);
int btrfs_balance(struct btrfs_balance_control *bctl,
struct btrfs_ioctl_balance_args *bargs);
-int btrfs_recover_balance(struct btrfs_root *tree_root);
+int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
+int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);