summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/backref.c109
-rw-r--r--fs/btrfs/ctree.c60
-rw-r--r--fs/btrfs/ctree.h9
-rw-r--r--fs/btrfs/disk-io.c35
-rw-r--r--fs/btrfs/extent-tree.c11
-rw-r--r--fs/btrfs/extent_io.c14
-rw-r--r--fs/btrfs/file.c13
-rw-r--r--fs/btrfs/free-space-cache.c145
-rw-r--r--fs/btrfs/inode.c61
-rw-r--r--fs/btrfs/ioctl.h2
-rw-r--r--fs/btrfs/super.c4
-rw-r--r--fs/btrfs/tree-log.c6
-rw-r--r--fs/btrfs/volumes.c95
-rw-r--r--fs/btrfs/volumes.h3
14 files changed, 320 insertions, 247 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 8f7d1237b7a0..a383c18e74e8 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -179,61 +179,74 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
struct ulist *parents, int level,
- struct btrfs_key *key, u64 time_seq,
+ struct btrfs_key *key_for_search, u64 time_seq,
u64 wanted_disk_byte,
const u64 *extent_item_pos)
{
- int ret;
- int slot = path->slots[level];
- struct extent_buffer *eb = path->nodes[level];
+ int ret = 0;
+ int slot;
+ struct extent_buffer *eb;
+ struct btrfs_key key;
struct btrfs_file_extent_item *fi;
struct extent_inode_elem *eie = NULL;
u64 disk_byte;
- u64 wanted_objectid = key->objectid;
-add_parent:
- if (level == 0 && extent_item_pos) {
- fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
- ret = check_extent_in_eb(key, eb, fi, *extent_item_pos, &eie);
+ if (level != 0) {
+ eb = path->nodes[level];
+ ret = ulist_add(parents, eb->start, 0, GFP_NOFS);
if (ret < 0)
return ret;
- }
- ret = ulist_add(parents, eb->start, (unsigned long)eie, GFP_NOFS);
- if (ret < 0)
- return ret;
-
- if (level != 0)
return 0;
+ }
/*
- * if the current leaf is full with EXTENT_DATA items, we must
- * check the next one if that holds a reference as well.
- * ref->count cannot be used to skip this check.
- * repeat this until we don't find any additional EXTENT_DATA items.
+ * We normally enter this function with the path already pointing to
+ * the first item to check. But sometimes, we may enter it with
+ * slot==nritems. In that case, go to the next leaf before we continue.
*/
- while (1) {
- eie = NULL;
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
ret = btrfs_next_old_leaf(root, path, time_seq);
- if (ret < 0)
- return ret;
- if (ret)
- return 0;
+ while (!ret) {
eb = path->nodes[0];
- for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) {
- btrfs_item_key_to_cpu(eb, key, slot);
- if (key->objectid != wanted_objectid ||
- key->type != BTRFS_EXTENT_DATA_KEY)
- return 0;
- fi = btrfs_item_ptr(eb, slot,
- struct btrfs_file_extent_item);
- disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
- if (disk_byte == wanted_disk_byte)
- goto add_parent;
+ slot = path->slots[0];
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+
+ if (key.objectid != key_for_search->objectid ||
+ key.type != BTRFS_EXTENT_DATA_KEY)
+ break;
+
+ fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
+ disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
+
+ if (disk_byte == wanted_disk_byte) {
+ eie = NULL;
+ if (extent_item_pos) {
+ ret = check_extent_in_eb(&key, eb, fi,
+ *extent_item_pos,
+ &eie);
+ if (ret < 0)
+ break;
+ }
+ if (!ret) {
+ ret = ulist_add(parents, eb->start,
+ (unsigned long)eie, GFP_NOFS);
+ if (ret < 0)
+ break;
+ if (!extent_item_pos) {
+ ret = btrfs_next_old_leaf(root, path,
+ time_seq);
+ continue;
+ }
+ }
}
+ ret = btrfs_next_old_item(root, path, time_seq);
}
- return 0;
+ if (ret > 0)
+ ret = 0;
+ return ret;
}
/*
@@ -250,7 +263,6 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
struct btrfs_path *path;
struct btrfs_root *root;
struct btrfs_key root_key;
- struct btrfs_key key = {0};
struct extent_buffer *eb;
int ret = 0;
int root_level;
@@ -289,17 +301,19 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
goto out;
eb = path->nodes[level];
- if (!eb) {
- WARN_ON(1);
- ret = 1;
- goto out;
+ while (!eb) {
+ if (!level) {
+ WARN_ON(1);
+ ret = 1;
+ goto out;
+ }
+ level--;
+ eb = path->nodes[level];
}
- if (level == 0)
- btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
-
- ret = add_all_parents(root, path, parents, level, &key, time_seq,
- ref->wanted_disk_byte, extent_item_pos);
+ ret = add_all_parents(root, path, parents, level, &ref->key_for_search,
+ time_seq, ref->wanted_disk_byte,
+ extent_item_pos);
out:
btrfs_free_path(path);
return ret;
@@ -825,6 +839,7 @@ again:
}
ret = __add_delayed_refs(head, delayed_ref_seq,
&prefs_delayed);
+ mutex_unlock(&head->mutex);
if (ret) {
spin_unlock(&delayed_refs->lock);
goto out;
@@ -918,8 +933,6 @@ again:
}
out:
- if (head)
- mutex_unlock(&head->mutex);
btrfs_free_path(path);
while (!list_empty(&prefs)) {
ref = list_first_entry(&prefs, struct __prelim_ref, list);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 15cbc2bf4ff0..8206b3900587 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1024,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
if (!looped && !tm)
return 0;
/*
- * we must have key remove operations in the log before the
- * replace operation.
+ * if there are no tree operation for the oldest root, we simply
+ * return it. this should only happen if that (old) root is at
+ * level 0.
*/
- BUG_ON(!tm);
+ if (!tm)
+ break;
+ /*
+ * if there's an operation that's not a root replacement, we
+ * found the oldest version of our root. normally, we'll find a
+ * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
+ */
if (tm->op != MOD_LOG_ROOT_REPLACE)
break;
@@ -1087,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
tm->generation);
break;
case MOD_LOG_KEY_ADD:
- if (tm->slot != n - 1) {
- o_dst = btrfs_node_key_ptr_offset(tm->slot);
- o_src = btrfs_node_key_ptr_offset(tm->slot + 1);
- memmove_extent_buffer(eb, o_dst, o_src, p_size);
- }
+ /* if a move operation is needed it's in the log */
n--;
break;
case MOD_LOG_MOVE_KEYS:
@@ -1192,16 +1195,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
}
tm = tree_mod_log_search(root->fs_info, logical, time_seq);
- /*
- * there was an item in the log when __tree_mod_log_oldest_root
- * returned. this one must not go away, because the time_seq passed to
- * us must be blocking its removal.
- */
- BUG_ON(!tm);
-
if (old_root)
- eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT,
- root->nodesize);
+ eb = alloc_dummy_extent_buffer(logical, root->nodesize);
else
eb = btrfs_clone_extent_buffer(root->node);
btrfs_tree_read_unlock(root->node);
@@ -1216,7 +1211,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_set_header_level(eb, old_root->level);
btrfs_set_header_generation(eb, old_generation);
}
- __tree_mod_log_rewind(eb, time_seq, tm);
+ if (tm)
+ __tree_mod_log_rewind(eb, time_seq, tm);
+ else
+ WARN_ON(btrfs_header_level(eb) != 0);
extent_buffer_get(eb);
return eb;
@@ -2995,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
static void insert_ptr(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_disk_key *key, u64 bytenr,
- int slot, int level, int tree_mod_log)
+ int slot, int level)
{
struct extent_buffer *lower;
int nritems;
@@ -3008,7 +3006,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
BUG_ON(slot > nritems);
BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
if (slot != nritems) {
- if (tree_mod_log && level)
+ if (level)
tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
slot, nritems - slot);
memmove_extent_buffer(lower,
@@ -3016,7 +3014,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
btrfs_node_key_ptr_offset(slot),
(nritems - slot) * sizeof(struct btrfs_key_ptr));
}
- if (tree_mod_log && level) {
+ if (level) {
ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
MOD_LOG_KEY_ADD);
BUG_ON(ret < 0);
@@ -3104,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(split);
insert_ptr(trans, root, path, &disk_key, split->start,
- path->slots[level + 1] + 1, level + 1, 1);
+ path->slots[level + 1] + 1, level + 1);
if (path->slots[level] >= mid) {
path->slots[level] -= mid;
@@ -3641,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(l, mid);
btrfs_item_key(right, &disk_key, 0);
insert_ptr(trans, root, path, &disk_key, right->start,
- path->slots[1] + 1, 1, 0);
+ path->slots[1] + 1, 1);
btrfs_mark_buffer_dirty(right);
btrfs_mark_buffer_dirty(l);
@@ -3848,7 +3846,7 @@ again:
if (mid <= slot) {
btrfs_set_header_nritems(right, 0);
insert_ptr(trans, root, path, &disk_key, right->start,
- path->slots[1] + 1, 1, 0);
+ path->slots[1] + 1, 1);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
@@ -3857,7 +3855,7 @@ again:
} else {
btrfs_set_header_nritems(right, 0);
insert_ptr(trans, root, path, &disk_key, right->start,
- path->slots[1], 1, 0);
+ path->slots[1], 1);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
@@ -5121,6 +5119,18 @@ again:
if (!path->skip_locking) {
ret = btrfs_try_tree_read_lock(next);
+ if (!ret && time_seq) {
+ /*
+ * If we don't get the lock, we may be racing
+ * with push_leaf_left, holding that lock while
+ * itself waiting for the leaf we've currently
+ * locked. To solve this situation, we give up
+ * on our lock and cycle.
+ */
+ btrfs_release_path(path);
+ cond_resched();
+ goto again;
+ }
if (!ret) {
btrfs_set_path_blocking(path);
btrfs_tree_read_lock(next);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8b73b2d4deb7..fa5c45b39075 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2755,13 +2755,18 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
u64 time_seq);
-static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
+static inline int btrfs_next_old_item(struct btrfs_root *root,
+ struct btrfs_path *p, u64 time_seq)
{
++p->slots[0];
if (p->slots[0] >= btrfs_header_nritems(p->nodes[0]))
- return btrfs_next_leaf(root, p);
+ return btrfs_next_old_leaf(root, p, time_seq);
return 0;
}
+static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
+{
+ return btrfs_next_old_item(root, p, 0);
+}
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e1890b1d3075..2936ca49b3b4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2354,12 +2354,17 @@ retry_root_backup:
BTRFS_CSUM_TREE_OBJECTID, csum_root);
if (ret)
goto recovery_tree_root;
-
csum_root->track_dirty = 1;
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
+ ret = btrfs_recover_balance(fs_info);
+ if (ret) {
+ printk(KERN_WARNING "btrfs: failed to recover balance\n");
+ goto fail_block_groups;
+ }
+
ret = btrfs_init_dev_stats(fs_info);
if (ret) {
printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
@@ -2485,20 +2490,23 @@ retry_root_backup:
goto fail_trans_kthread;
}
- if (!(sb->s_flags & MS_RDONLY)) {
- down_read(&fs_info->cleanup_work_sem);
- err = btrfs_orphan_cleanup(fs_info->fs_root);
- if (!err)
- err = btrfs_orphan_cleanup(fs_info->tree_root);
- up_read(&fs_info->cleanup_work_sem);
+ if (sb->s_flags & MS_RDONLY)
+ return 0;
- if (!err)
- err = btrfs_recover_balance(fs_info->tree_root);
+ down_read(&fs_info->cleanup_work_sem);
+ if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
+ (ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
+ up_read(&fs_info->cleanup_work_sem);
+ close_ctree(tree_root);
+ return ret;
+ }
+ up_read(&fs_info->cleanup_work_sem);
- if (err) {
- close_ctree(tree_root);
- return err;
- }
+ ret = btrfs_resume_balance_async(fs_info);
+ if (ret) {
+ printk(KERN_WARNING "btrfs: failed to resume balance\n");
+ close_ctree(tree_root);
+ return ret;
}
return 0;
@@ -3426,6 +3434,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(ref);
+ spin_lock(&delayed_refs->lock);
continue;
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4b5a1e1bdefb..6e1d36702ff7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2347,12 +2347,10 @@ next:
return count;
}
-
static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
- unsigned long num_refs)
+ unsigned long num_refs,
+ struct list_head *first_seq)
{
- struct list_head *first_seq = delayed_refs->seq_head.next;
-
spin_unlock(&delayed_refs->lock);
pr_debug("waiting for more refs (num %ld, first %p)\n",
num_refs, first_seq);
@@ -2381,6 +2379,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_node *ref;
struct list_head cluster;
+ struct list_head *first_seq = NULL;
int ret;
u64 delayed_start;
int run_all = count == (unsigned long)-1;
@@ -2436,8 +2435,10 @@ again:
*/
consider_waiting = 1;
num_refs = delayed_refs->num_entries;
+ first_seq = root->fs_info->tree_mod_seq_list.next;
} else {
- wait_for_more_refs(delayed_refs, num_refs);
+ wait_for_more_refs(delayed_refs,
+ num_refs, first_seq);
/*
* after waiting, things have changed. we
* dropped the lock and someone else might have
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index aaa12c1eb348..01c21b6c6d43 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3324,6 +3324,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
writepage_t writepage, void *data,
void (*flush_fn)(void *))
{
+ struct inode *inode = mapping->host;
int ret = 0;
int done = 0;
int nr_to_write_done = 0;
@@ -3334,6 +3335,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
int scanned = 0;
int tag;
+ /*
+ * We have to hold onto the inode so that ordered extents can do their
+ * work when the IO finishes. The alternative to this is failing to add
+ * an ordered extent if the igrab() fails there and that is a huge pain
+ * to deal with, so instead just hold onto the inode throughout the
+ * writepages operation. If it fails here we are freeing up the inode
+ * anyway and we'd rather not waste our time writing out stuff that is
+ * going to be truncated anyway.
+ */
+ if (!igrab(inode))
+ return 0;
+
pagevec_init(&pvec, 0);
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
@@ -3428,6 +3441,7 @@ retry:
index = 0;
goto retry;
}
+ btrfs_add_delayed_iput(inode);
return ret;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 70dc8ca73e25..9aa01ec2138d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1334,7 +1334,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
loff_t *ppos, size_t count, size_t ocount)
{
struct file *file = iocb->ki_filp;
- struct inode *inode = fdentry(file)->d_inode;
struct iov_iter i;
ssize_t written;
ssize_t written_buffered;
@@ -1344,18 +1343,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
count, ocount);
- /*
- * the generic O_DIRECT will update in-memory i_size after the
- * DIOs are done. But our endio handlers that update the on
- * disk i_size never update past the in memory i_size. So we
- * need one more update here to catch any additions to the
- * file
- */
- if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
- btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
- mark_inode_dirty(inode);
- }
-
if (written < 0 || written == count)
return written;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 81296c57405a..6c4e2baa9290 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1543,29 +1543,26 @@ again:
end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
/*
- * XXX - this can go away after a few releases.
- *
- * since the only user of btrfs_remove_free_space is the tree logging
- * stuff, and the only way to test that is under crash conditions, we
- * want to have this debug stuff here just in case somethings not
- * working. Search the bitmap for the space we are trying to use to
- * make sure its actually there. If its not there then we need to stop
- * because something has gone wrong.
+ * We need to search for bits in this bitmap. We could only cover some
+ * of the extent in this bitmap thanks to how we add space, so we need
+ * to search for as much as it as we can and clear that amount, and then
+ * go searching for the next bit.
*/
search_start = *offset;
- search_bytes = *bytes;
+ search_bytes = ctl->unit;
search_bytes = min(search_bytes, end - search_start + 1);
ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
BUG_ON(ret < 0 || search_start != *offset);
- if (*offset > bitmap_info->offset && *offset + *bytes > end) {
- bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
- *bytes -= end - *offset + 1;
- *offset = end + 1;
- } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
- bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
- *bytes = 0;
- }
+ /* We may have found more bits than what we need */
+ search_bytes = min(search_bytes, *bytes);
+
+ /* Cannot clear past the end of the bitmap */
+ search_bytes = min(search_bytes, end - search_start + 1);
+
+ bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
+ *offset += search_bytes;
+ *bytes -= search_bytes;
if (*bytes) {
struct rb_node *next = rb_next(&bitmap_info->offset_index);
@@ -1596,7 +1593,7 @@ again:
* everything over again.
*/
search_start = *offset;
- search_bytes = *bytes;
+ search_bytes = ctl->unit;
ret = search_bitmap(ctl, bitmap_info, &search_start,
&search_bytes);
if (ret < 0 || search_start != *offset)
@@ -1879,12 +1876,14 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *info;
- struct btrfs_free_space *next_info = NULL;
int ret = 0;
spin_lock(&ctl->tree_lock);
again:
+ if (!bytes)
+ goto out_lock;
+
info = tree_search_offset(ctl, offset, 0, 0);
if (!info) {
/*
@@ -1905,88 +1904,48 @@ again:
}
}
- if (info->bytes < bytes && rb_next(&info->offset_index)) {
- u64 end;
- next_info = rb_entry(rb_next(&info->offset_index),
- struct btrfs_free_space,
- offset_index);
-
- if (next_info->bitmap)
- end = next_info->offset +
- BITS_PER_BITMAP * ctl->unit - 1;
- else
- end = next_info->offset + next_info->bytes;
-
- if (next_info->bytes < bytes ||
- next_info->offset > offset || offset > end) {
- printk(KERN_CRIT "Found free space at %llu, size %llu,"
- " trying to use %llu\n",
- (unsigned long long)info->offset,
- (unsigned long long)info->bytes,
- (unsigned long long)bytes);
- WARN_ON(1);
- ret = -EINVAL;
- goto out_lock;
- }
-
- info = next_info;
- }
-
- if (info->bytes == bytes) {
+ if (!info->bitmap) {
unlink_free_space(ctl, info);
- if (info->bitmap) {
- kfree(info->bitmap);
- ctl->total_bitmaps--;
- }
- kmem_cache_free(btrfs_free_space_cachep, info);
- ret = 0;
- goto out_lock;
- }
-
- if (!info->bitmap && info->offset == offset) {
- unlink_free_space(ctl, info);
- info->offset += bytes;
- info->bytes -= bytes;
- ret = link_free_space(ctl, info);
- WARN_ON(ret);
- goto out_lock;
- }
+ if (offset == info->offset) {
+ u64 to_free = min(bytes, info->bytes);
+
+ info->bytes -= to_free;
+ info->offset += to_free;
+ if (info->bytes) {
+ ret = link_free_space(ctl, info);
+ WARN_ON(ret);
+ } else {
+ kmem_cache_free(btrfs_free_space_cachep, info);
+ }
- if (!info->bitmap && info->offset <= offset &&
- info->offset + info->bytes >= offset + bytes) {
- u64 old_start = info->offset;
- /*
- * we're freeing space in the middle of the info,
- * this can happen during tree log replay
- *
- * first unlink the old info and then
- * insert it again after the hole we're creating
- */
- unlink_free_space(ctl, info);
- if (offset + bytes < info->offset + info->bytes) {
- u64 old_end = info->offset + info->bytes;
+ offset += to_free;
+ bytes -= to_free;
+ goto again;
+ } else {
+ u64 old_end = info->bytes + info->offset;
- info->offset = offset + bytes;
- info->bytes = old_end - info->offset;
+ info->bytes = offset - info->offset;
ret = link_free_space(ctl, info);
WARN_ON(ret);
if (ret)
goto out_lock;
- } else {
- /* the hole we're creating ends at the end
- * of the info struct, just free the info
- */
- kmem_cache_free(btrfs_free_space_cachep, info);
- }
- spin_unlock(&ctl->tree_lock);
- /* step two, insert a new info struct to cover
- * anything before the hole
- */
- ret = btrfs_add_free_space(block_group, old_start,
- offset - old_start);
- WARN_ON(ret); /* -ENOMEM */
- goto out;
+ /* Not enough bytes in this entry to satisfy us */
+ if (old_end < offset + bytes) {
+ bytes -= old_end - offset;
+ offset = old_end;
+ goto again;
+ } else if (old_end == offset + bytes) {
+ /* all done */
+ goto out_lock;
+ }
+ spin_unlock(&ctl->tree_lock);
+
+ ret = btrfs_add_free_space(block_group, offset + bytes,
+ old_end - (offset + bytes));
+ WARN_ON(ret);
+ goto out;
+ }
}
ret = remove_from_bitmap(ctl, info, &offset, &bytes);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a4f02501da40..a7d1921ac76b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -987,7 +987,7 @@ static noinline void async_cow_start(struct btrfs_work *work)
async_cow->start, async_cow->end, async_cow,
&num_added);
if (num_added == 0) {
- iput(async_cow->inode);
+ btrfs_add_delayed_iput(async_cow->inode);
async_cow->inode = NULL;
}
}
@@ -1023,7 +1023,7 @@ static noinline void async_cow_free(struct btrfs_work *work)
struct async_cow *async_cow;
async_cow = container_of(work, struct async_cow, work);
if (async_cow->inode)
- iput(async_cow->inode);
+ btrfs_add_delayed_iput(async_cow->inode);
kfree(async_cow);
}
@@ -3754,7 +3754,7 @@ void btrfs_evict_inode(struct inode *inode)
btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (root->fs_info->log_root_recovering) {
- BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
+ BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags));
goto no_delete;
}
@@ -5876,8 +5876,17 @@ map:
bh_result->b_size = len;
bh_result->b_bdev = em->bdev;
set_buffer_mapped(bh_result);
- if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
- set_buffer_new(bh_result);
+ if (create) {
+ if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ set_buffer_new(bh_result);
+
+ /*
+ * Need to update the i_size under the extent lock so buffered
+ * readers will get the updated i_size when we unlock.
+ */
+ if (start + len > i_size_read(inode))
+ i_size_write(inode, start + len);
+ }
free_extent_map(em);
@@ -6360,12 +6369,48 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
*/
ordered = btrfs_lookup_ordered_range(inode, lockstart,
lockend - lockstart + 1);
- if (!ordered)
+
+ /*
+ * We need to make sure there are no buffered pages in this
+ * range either, we could have raced between the invalidate in
+ * generic_file_direct_write and locking the extent. The
+ * invalidate needs to happen so that reads after a write do not
+ * get stale data.
+ */
+ if (!ordered && (!writing ||
+ !test_range_bit(&BTRFS_I(inode)->io_tree,
+ lockstart, lockend, EXTENT_UPTODATE, 0,
+ cached_state)))
break;
+
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
+
+ if (ordered) {
+ btrfs_start_ordered_extent(inode, ordered, 1);
+ btrfs_put_ordered_extent(ordered);
+ } else {
+ /* Screw you mmap */
+ ret = filemap_write_and_wait_range(file->f_mapping,
+ lockstart,
+ lockend);
+ if (ret)
+ goto out;
+
+ /*
+ * If we found a page that couldn't be invalidated just
+ * fall back to buffered.
+ */
+ ret = invalidate_inode_pages2_range(file->f_mapping,
+ lockstart >> PAGE_CACHE_SHIFT,
+ lockend >> PAGE_CACHE_SHIFT);
+ if (ret) {
+ if (ret == -EBUSY)
+ ret = 0;
+ goto out;
+ }
+ }
+
cond_resched();
}
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 497c530724cf..e440aa653c30 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -339,7 +339,7 @@ struct btrfs_ioctl_get_dev_stats {
#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
struct btrfs_ioctl_vol_args_v2)
-#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64)
+#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
struct btrfs_ioctl_scrub_args)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0eb9a4da069e..e23991574fdf 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1187,6 +1187,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret)
goto restore;
+ ret = btrfs_resume_balance_async(fs_info);
+ if (ret)
+ goto restore;
+
sb->s_flags &= ~MS_RDONLY;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2017d0ff511c..8abeae4224f9 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -690,6 +690,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
kfree(name);
iput(inode);
+
+ btrfs_run_delayed_items(trans, root);
return ret;
}
@@ -895,6 +897,7 @@ again:
ret = btrfs_unlink_inode(trans, root, dir,
inode, victim_name,
victim_name_len);
+ btrfs_run_delayed_items(trans, root);
}
kfree(victim_name);
ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
@@ -1475,6 +1478,9 @@ again:
ret = btrfs_unlink_inode(trans, root, dir, inode,
name, name_len);
BUG_ON(ret);
+
+ btrfs_run_delayed_items(trans, root);
+
kfree(name);
iput(inode);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8a3d2594b807..ecaad40e7ef4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2845,31 +2845,48 @@ out:
static int balance_kthread(void *data)
{
- struct btrfs_balance_control *bctl =
- (struct btrfs_balance_control *)data;
- struct btrfs_fs_info *fs_info = bctl->fs_info;
+ struct btrfs_fs_info *fs_info = data;
int ret = 0;
mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
- set_balance_control(bctl);
-
- if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
- printk(KERN_INFO "btrfs: force skipping balance\n");
- } else {
+ if (fs_info->balance_ctl) {
printk(KERN_INFO "btrfs: continuing balance\n");
- ret = btrfs_balance(bctl, NULL);
+ ret = btrfs_balance(fs_info->balance_ctl, NULL);
}
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
+
return ret;
}
-int btrfs_recover_balance(struct btrfs_root *tree_root)
+int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
{
struct task_struct *tsk;
+
+ spin_lock(&fs_info->balance_lock);
+ if (!fs_info->balance_ctl) {
+ spin_unlock(&fs_info->balance_lock);
+ return 0;
+ }
+ spin_unlock(&fs_info->balance_lock);
+
+ if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
+ printk(KERN_INFO "btrfs: force skipping balance\n");
+ return 0;
+ }
+
+ tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
+ if (IS_ERR(tsk))
+ return PTR_ERR(tsk);
+
+ return 0;
+}
+
+int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
+{
struct btrfs_balance_control *bctl;
struct btrfs_balance_item *item;
struct btrfs_disk_balance_args disk_bargs;
@@ -2882,29 +2899,30 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
if (!path)
return -ENOMEM;
- bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
- if (!bctl) {
- ret = -ENOMEM;
- goto out;
- }
-
key.objectid = BTRFS_BALANCE_OBJECTID;
key.type = BTRFS_BALANCE_ITEM_KEY;
key.offset = 0;
- ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
if (ret < 0)
- goto out_bctl;
+ goto out;
if (ret > 0) { /* ret = -ENOENT; */
ret = 0;
- goto out_bctl;
+ goto out;
+ }
+
+ bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
+ if (!bctl) {
+ ret = -ENOMEM;
+ goto out;
}
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
- bctl->fs_info = tree_root->fs_info;
- bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME;
+ bctl->fs_info = fs_info;
+ bctl->flags = btrfs_balance_flags(leaf, item);
+ bctl->flags |= BTRFS_BALANCE_RESUME;
btrfs_balance_data(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
@@ -2913,14 +2931,13 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
btrfs_balance_sys(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
- tsk = kthread_run(balance_kthread, bctl, "btrfs-balance");
- if (IS_ERR(tsk))
- ret = PTR_ERR(tsk);
- else
- goto out;
+ mutex_lock(&fs_info->volume_mutex);
+ mutex_lock(&fs_info->balance_mutex);
-out_bctl:
- kfree(bctl);
+ set_balance_control(bctl);
+
+ mutex_unlock(&fs_info->balance_mutex);
+ mutex_unlock(&fs_info->volume_mutex);
out:
btrfs_free_path(path);
return ret;
@@ -4061,16 +4078,18 @@ static void btrfs_end_bio(struct bio *bio, int err)
BUG_ON(stripe_index >= bbio->num_stripes);
dev = bbio->stripes[stripe_index].dev;
- if (bio->bi_rw & WRITE)
- btrfs_dev_stat_inc(dev,
- BTRFS_DEV_STAT_WRITE_ERRS);
- else
- btrfs_dev_stat_inc(dev,
- BTRFS_DEV_STAT_READ_ERRS);
- if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
- btrfs_dev_stat_inc(dev,
- BTRFS_DEV_STAT_FLUSH_ERRS);
- btrfs_dev_stat_print_on_error(dev);
+ if (dev->bdev) {
+ if (bio->bi_rw & WRITE)
+ btrfs_dev_stat_inc(dev,
+ BTRFS_DEV_STAT_WRITE_ERRS);
+ else
+ btrfs_dev_stat_inc(dev,
+ BTRFS_DEV_STAT_READ_ERRS);
+ if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
+ btrfs_dev_stat_inc(dev,
+ BTRFS_DEV_STAT_FLUSH_ERRS);
+ btrfs_dev_stat_print_on_error(dev);
+ }
}
}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 74366f27a76b..95f6637614db 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -281,7 +281,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
int btrfs_init_new_device(struct btrfs_root *root, char *path);
int btrfs_balance(struct btrfs_balance_control *bctl,
struct btrfs_ioctl_balance_args *bargs);
-int btrfs_recover_balance(struct btrfs_root *tree_root);
+int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
+int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);