summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-07-01 12:28:42 +0400
committerIngo Molnar <mingo@elte.hu>2011-07-01 12:28:46 +0400
commit10e6962765f8213941eaa1cbb9de425ff0689e2e (patch)
treec277e4ac5b2b7156152454578c39e90da5127dd8 /fs/btrfs
parentaf07ce3e77d3b24ab1d71fcc5833d41800f23b2b (diff)
parentb0af8dfdd67699e25083478c63eedef2e72ebd85 (diff)
downloadlinux-10e6962765f8213941eaa1cbb9de425ff0689e2e.tar.xz
Merge commit 'v3.0-rc5' into perf/core
Merge reason: Pick up the latest fixes. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/ctree.h16
-rw-r--r--fs/btrfs/delayed-inode.c136
-rw-r--r--fs/btrfs/delayed-inode.h6
-rw-r--r--fs/btrfs/disk-io.c12
-rw-r--r--fs/btrfs/extent-tree.c8
-rw-r--r--fs/btrfs/free-space-cache.c9
-rw-r--r--fs/btrfs/inode.c14
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/relocation.c30
-rw-r--r--fs/btrfs/sysfs.c146
-rw-r--r--fs/btrfs/transaction.c114
-rw-r--r--fs/btrfs/tree-log.c2
12 files changed, 274 insertions, 221 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 378b5b4443f3..f30ac05dbda7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -19,7 +19,6 @@
#ifndef __BTRFS_CTREE__
#define __BTRFS_CTREE__
-#include <linux/version.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/fs.h>
@@ -967,6 +966,12 @@ struct btrfs_fs_info {
struct srcu_struct subvol_srcu;
spinlock_t trans_lock;
+ /*
+ * the reloc mutex goes with the trans lock, it is taken
+ * during commit to protect us from the relocation code
+ */
+ struct mutex reloc_mutex;
+
struct list_head trans_list;
struct list_head hashers;
struct list_head dead_roots;
@@ -1172,6 +1177,14 @@ struct btrfs_root {
u32 type;
u64 highest_objectid;
+
+ /* btrfs_record_root_in_trans is a multi-step process,
+ * and it can race with the balancing code. But the
+ * race is very small, and only the first time the root
+ * is added to each transaction. So in_trans_setup
+ * is used to tell us when more checks are required
+ */
+ unsigned long in_trans_setup;
int ref_cows;
int track_dirty;
int in_radix;
@@ -1181,7 +1194,6 @@ struct btrfs_root {
struct btrfs_key defrag_max;
int defrag_running;
char *name;
- int in_sysfs;
/* the dirty list is only used by non-reference counted roots */
struct list_head dirty_list;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 6462c29d2d37..98c68e658a9b 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -82,19 +82,16 @@ static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
return root->fs_info->delayed_root;
}
-static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
- struct inode *inode)
+static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
{
- struct btrfs_delayed_node *node;
struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
struct btrfs_root *root = btrfs_inode->root;
u64 ino = btrfs_ino(inode);
- int ret;
+ struct btrfs_delayed_node *node;
-again:
node = ACCESS_ONCE(btrfs_inode->delayed_node);
if (node) {
- atomic_inc(&node->refs); /* can be accessed */
+ atomic_inc(&node->refs);
return node;
}
@@ -102,8 +99,10 @@ again:
node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
if (node) {
if (btrfs_inode->delayed_node) {
+ atomic_inc(&node->refs); /* can be accessed */
+ BUG_ON(btrfs_inode->delayed_node != node);
spin_unlock(&root->inode_lock);
- goto again;
+ return node;
}
btrfs_inode->delayed_node = node;
atomic_inc(&node->refs); /* can be accessed */
@@ -113,6 +112,23 @@ again:
}
spin_unlock(&root->inode_lock);
+ return NULL;
+}
+
+static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
+ struct inode *inode)
+{
+ struct btrfs_delayed_node *node;
+ struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
+ struct btrfs_root *root = btrfs_inode->root;
+ u64 ino = btrfs_ino(inode);
+ int ret;
+
+again:
+ node = btrfs_get_delayed_node(inode);
+ if (node)
+ return node;
+
node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
if (!node)
return ERR_PTR(-ENOMEM);
@@ -297,7 +313,6 @@ struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
item->data_len = data_len;
item->ins_or_del = 0;
item->bytes_reserved = 0;
- item->block_rsv = NULL;
item->delayed_node = NULL;
atomic_set(&item->refs, 1);
}
@@ -549,19 +564,6 @@ struct btrfs_delayed_item *__btrfs_next_delayed_item(
return next;
}
-static inline struct btrfs_delayed_node *btrfs_get_delayed_node(
- struct inode *inode)
-{
- struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
- struct btrfs_delayed_node *delayed_node;
-
- delayed_node = btrfs_inode->delayed_node;
- if (delayed_node)
- atomic_inc(&delayed_node->refs);
-
- return delayed_node;
-}
-
static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
u64 root_id)
{
@@ -593,10 +595,8 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
- if (!ret) {
+ if (!ret)
item->bytes_reserved = num_bytes;
- item->block_rsv = dst_rsv;
- }
return ret;
}
@@ -604,10 +604,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
struct btrfs_delayed_item *item)
{
+ struct btrfs_block_rsv *rsv;
+
if (!item->bytes_reserved)
return;
- btrfs_block_rsv_release(root, item->block_rsv,
+ rsv = &root->fs_info->global_block_rsv;
+ btrfs_block_rsv_release(root, rsv,
item->bytes_reserved);
}
@@ -1014,6 +1017,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_root *delayed_root;
struct btrfs_delayed_node *curr_node, *prev_node;
struct btrfs_path *path;
+ struct btrfs_block_rsv *block_rsv;
int ret = 0;
path = btrfs_alloc_path();
@@ -1021,6 +1025,9 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
return -ENOMEM;
path->leave_spinning = 1;
+ block_rsv = trans->block_rsv;
+ trans->block_rsv = &root->fs_info->global_block_rsv;
+
delayed_root = btrfs_get_delayed_root(root);
curr_node = btrfs_first_delayed_node(delayed_root);
@@ -1045,6 +1052,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
}
btrfs_free_path(path);
+ trans->block_rsv = block_rsv;
return ret;
}
@@ -1052,6 +1060,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *node)
{
struct btrfs_path *path;
+ struct btrfs_block_rsv *block_rsv;
int ret;
path = btrfs_alloc_path();
@@ -1059,6 +1068,9 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
return -ENOMEM;
path->leave_spinning = 1;
+ block_rsv = trans->block_rsv;
+ trans->block_rsv = &node->root->fs_info->global_block_rsv;
+
ret = btrfs_insert_delayed_items(trans, path, node->root, node);
if (!ret)
ret = btrfs_delete_delayed_items(trans, path, node->root, node);
@@ -1066,6 +1078,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
ret = btrfs_update_delayed_inode(trans, node->root, path, node);
btrfs_free_path(path);
+ trans->block_rsv = block_rsv;
return ret;
}
@@ -1116,6 +1129,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
struct btrfs_path *path;
struct btrfs_delayed_node *delayed_node = NULL;
struct btrfs_root *root;
+ struct btrfs_block_rsv *block_rsv;
unsigned long nr = 0;
int need_requeue = 0;
int ret;
@@ -1134,6 +1148,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
if (IS_ERR(trans))
goto free_path;
+ block_rsv = trans->block_rsv;
+ trans->block_rsv = &root->fs_info->global_block_rsv;
+
ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
if (!ret)
ret = btrfs_delete_delayed_items(trans, path, root,
@@ -1176,6 +1193,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
nr = trans->blocks_used;
+ trans->block_rsv = block_rsv;
btrfs_end_transaction_dmeta(trans, root);
__btrfs_btree_balance_dirty(root, nr);
free_path:
@@ -1222,6 +1240,13 @@ again:
return 0;
}
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
+{
+ struct btrfs_delayed_root *delayed_root;
+ delayed_root = btrfs_get_delayed_root(root);
+ WARN_ON(btrfs_first_delayed_node(delayed_root));
+}
+
void btrfs_balance_delayed_items(struct btrfs_root *root)
{
struct btrfs_delayed_root *delayed_root;
@@ -1382,8 +1407,7 @@ end:
int btrfs_inode_delayed_dir_index_count(struct inode *inode)
{
- struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node;
- int ret = 0;
+ struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
if (!delayed_node)
return -ENOENT;
@@ -1393,11 +1417,14 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
* a new directory index is added into the delayed node and index_cnt
* is updated now. So we needn't lock the delayed node.
*/
- if (!delayed_node->index_cnt)
+ if (!delayed_node->index_cnt) {
+ btrfs_release_delayed_node(delayed_node);
return -EINVAL;
+ }
BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
- return ret;
+ btrfs_release_delayed_node(delayed_node);
+ return 0;
}
void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
@@ -1591,6 +1618,57 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
inode->i_ctime.tv_nsec);
}
+int btrfs_fill_inode(struct inode *inode, u32 *rdev)
+{
+ struct btrfs_delayed_node *delayed_node;
+ struct btrfs_inode_item *inode_item;
+ struct btrfs_timespec *tspec;
+
+ delayed_node = btrfs_get_delayed_node(inode);
+ if (!delayed_node)
+ return -ENOENT;
+
+ mutex_lock(&delayed_node->mutex);
+ if (!delayed_node->inode_dirty) {
+ mutex_unlock(&delayed_node->mutex);
+ btrfs_release_delayed_node(delayed_node);
+ return -ENOENT;
+ }
+
+ inode_item = &delayed_node->inode_item;
+
+ inode->i_uid = btrfs_stack_inode_uid(inode_item);
+ inode->i_gid = btrfs_stack_inode_gid(inode_item);
+ btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
+ inode->i_mode = btrfs_stack_inode_mode(inode_item);
+ inode->i_nlink = btrfs_stack_inode_nlink(inode_item);
+ inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
+ BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
+ BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item);
+ inode->i_rdev = 0;
+ *rdev = btrfs_stack_inode_rdev(inode_item);
+ BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
+
+ tspec = btrfs_inode_atime(inode_item);
+ inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
+ inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
+
+ tspec = btrfs_inode_mtime(inode_item);
+ inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
+ inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
+
+ tspec = btrfs_inode_ctime(inode_item);
+ inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
+ inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
+
+ inode->i_generation = BTRFS_I(inode)->generation;
+ BTRFS_I(inode)->index_cnt = (u64)-1;
+
+ mutex_unlock(&delayed_node->mutex);
+ btrfs_release_delayed_node(delayed_node);
+ return 0;
+}
+
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode)
{
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index eb7d240aa648..8d27af4bd8b9 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -75,7 +75,6 @@ struct btrfs_delayed_item {
struct list_head tree_list; /* used for batch insert/delete items */
struct list_head readdir_list; /* used for readdir items */
u64 bytes_reserved;
- struct btrfs_block_rsv *block_rsv;
struct btrfs_delayed_node *delayed_node;
atomic_t refs;
int ins_or_del;
@@ -120,6 +119,7 @@ void btrfs_kill_delayed_inode_items(struct inode *inode);
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode);
+int btrfs_fill_inode(struct inode *inode, u32 *rdev);
/* Used for drop dead root */
void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
@@ -138,4 +138,8 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
/* for init */
int __init btrfs_delayed_inode_init(void);
void btrfs_delayed_inode_exit(void);
+
+/* for debugging */
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root);
+
#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9f68c6898653..1ac8db5dc0a3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1044,7 +1044,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->last_trans = 0;
root->highest_objectid = 0;
root->name = NULL;
- root->in_sysfs = 0;
root->inode_tree = RB_ROOT;
INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
root->block_rsv = NULL;
@@ -1300,19 +1299,21 @@ again:
return root;
root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
- if (!root->free_ino_ctl)
- goto fail;
root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
GFP_NOFS);
- if (!root->free_ino_pinned)
+ if (!root->free_ino_pinned || !root->free_ino_ctl) {
+ ret = -ENOMEM;
goto fail;
+ }
btrfs_init_free_ino_ctl(root);
mutex_init(&root->fs_commit_mutex);
spin_lock_init(&root->cache_lock);
init_waitqueue_head(&root->cache_wait);
- set_anon_super(&root->anon_super, NULL);
+ ret = set_anon_super(&root->anon_super, NULL);
+ if (ret)
+ goto fail;
if (btrfs_root_refs(&root->root_item) == 0) {
ret = -ENOENT;
@@ -1618,6 +1619,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
+ mutex_init(&fs_info->reloc_mutex);
init_completion(&fs_info->kobj_unregister);
fs_info->tree_root = tree_root;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b42efc2ded51..71cd456fdb60 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3314,10 +3314,6 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
if (reserved == 0)
return 0;
- /* nothing to shrink - nothing to reclaim */
- if (root->fs_info->delalloc_bytes == 0)
- return 0;
-
max_reclaim = min(reserved, to_reclaim);
while (loops < 1024) {
@@ -4846,7 +4842,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
u64 num_bytes, u64 empty_size,
u64 search_start, u64 search_end,
u64 hint_byte, struct btrfs_key *ins,
- int data)
+ u64 data)
{
int ret = 0;
struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -4873,7 +4869,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
space_info = __find_space_info(root->fs_info, data);
if (!space_info) {
- printk(KERN_ERR "No space info for %d\n", data);
+ printk(KERN_ERR "No space info for %llu\n", data);
return -ENOSPC;
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 9f985a429877..bf0d61567f3d 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1893,9 +1893,12 @@ void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
info = rb_entry(node, struct btrfs_free_space, offset_index);
- unlink_free_space(ctl, info);
- kfree(info->bitmap);
- kmem_cache_free(btrfs_free_space_cachep, info);
+ if (!info->bitmap) {
+ unlink_free_space(ctl, info);
+ kmem_cache_free(btrfs_free_space_cachep, info);
+ } else {
+ free_bitmap(ctl, info);
+ }
if (need_resched()) {
spin_unlock(&ctl->tree_lock);
cond_resched();
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 751ddf8fc58a..d340f63d8f07 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2509,6 +2509,11 @@ static void btrfs_read_locked_inode(struct inode *inode)
int maybe_acls;
u32 rdev;
int ret;
+ bool filled = false;
+
+ ret = btrfs_fill_inode(inode, &rdev);
+ if (!ret)
+ filled = true;
path = btrfs_alloc_path();
BUG_ON(!path);
@@ -2520,6 +2525,10 @@ static void btrfs_read_locked_inode(struct inode *inode)
goto make_bad;
leaf = path->nodes[0];
+
+ if (filled)
+ goto cache_acl;
+
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
if (!leaf->map_token)
@@ -2556,7 +2565,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
BTRFS_I(inode)->index_cnt = (u64)-1;
BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
-
+cache_acl:
/*
* try to precache a NULL acl entry for files that don't have
* any xattrs or acls
@@ -2572,7 +2581,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
}
btrfs_free_path(path);
- inode_item = NULL;
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
@@ -3076,6 +3084,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, root, dir);
BUG_ON(ret);
+ btrfs_free_path(path);
return 0;
}
@@ -4519,6 +4528,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode_tree_add(inode);
trace_btrfs_inode_new(inode);
+ btrfs_set_inode_last_trans(trans, inode);
return inode;
fail:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b793d112d1f6..a3c4751e07db 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -482,8 +482,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
BUG_ON(ret);
+ spin_lock(&root->fs_info->trans_lock);
list_add(&pending_snapshot->list,
&trans->transaction->pending_snapshots);
+ spin_unlock(&root->fs_info->trans_lock);
if (async_transid) {
*async_transid = trans->transid;
ret = btrfs_commit_transaction_async(trans,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b1ef27cc673b..5e0a3dc79a45 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
int ret;
if (!root->reloc_root)
- return 0;
+ goto out;
reloc_root = root->reloc_root;
root_item = &reloc_root->root_item;
@@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
ret = btrfs_update_root(trans, root->fs_info->tree_root,
&reloc_root->root_key, root_item);
BUG_ON(ret);
+
+out:
return 0;
}
@@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err)
u64 num_bytes = 0;
int ret;
- spin_lock(&root->fs_info->trans_lock);
+ mutex_lock(&root->fs_info->reloc_mutex);
rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
rc->merging_rsv_size += rc->nodes_relocated * 2;
- spin_unlock(&root->fs_info->trans_lock);
+ mutex_unlock(&root->fs_info->reloc_mutex);
+
again:
if (!err) {
num_bytes = rc->merging_rsv_size;
@@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc)
int ret;
again:
root = rc->extent_root;
- spin_lock(&root->fs_info->trans_lock);
+
+ /*
+ * this serializes us with btrfs_record_root_in_transaction,
+ * we have to make sure nobody is in the middle of
+ * adding their roots to the list while we are
+ * doing this splice
+ */
+ mutex_lock(&root->fs_info->reloc_mutex);
list_splice_init(&rc->reloc_roots, &reloc_roots);
- spin_unlock(&root->fs_info->trans_lock);
+ mutex_unlock(&root->fs_info->reloc_mutex);
while (!list_empty(&reloc_roots)) {
found = 1;
@@ -3590,17 +3600,19 @@ next:
static void set_reloc_control(struct reloc_control *rc)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
- spin_lock(&fs_info->trans_lock);
+
+ mutex_lock(&fs_info->reloc_mutex);
fs_info->reloc_ctl = rc;
- spin_unlock(&fs_info->trans_lock);
+ mutex_unlock(&fs_info->reloc_mutex);
}
static void unset_reloc_control(struct reloc_control *rc)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
- spin_lock(&fs_info->trans_lock);
+
+ mutex_lock(&fs_info->reloc_mutex);
fs_info->reloc_ctl = NULL;
- spin_unlock(&fs_info->trans_lock);
+ mutex_unlock(&fs_info->reloc_mutex);
}
static int check_extent_flags(u64 flags)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c3c223ae6691..daac9ae6d731 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -28,152 +28,6 @@
#include "disk-io.h"
#include "transaction.h"
-static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_root_used(&root->root_item));
-}
-
-static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_root_limit(&root->root_item));
-}
-
-static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf)
-{
-
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_super_bytes_used(&fs->super_copy));
-}
-
-static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_super_total_bytes(&fs->super_copy));
-}
-
-static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)btrfs_super_sectorsize(&fs->super_copy));
-}
-
-/* this is for root attrs (subvols/snapshots) */
-struct btrfs_root_attr {
- struct attribute attr;
- ssize_t (*show)(struct btrfs_root *, char *);
- ssize_t (*store)(struct btrfs_root *, const char *, size_t);
-};
-
-#define ROOT_ATTR(name, mode, show, store) \
-static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \
- show, store)
-
-ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL);
-ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL);
-
-static struct attribute *btrfs_root_attrs[] = {
- &btrfs_root_attr_blocks_used.attr,
- &btrfs_root_attr_block_limit.attr,
- NULL,
-};
-
-/* this is for super attrs (actual full fs) */
-struct btrfs_super_attr {
- struct attribute attr;
- ssize_t (*show)(struct btrfs_fs_info *, char *);
- ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t);
-};
-
-#define SUPER_ATTR(name, mode, show, store) \
-static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \
- show, store)
-
-SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL);
-SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL);
-SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL);
-
-static struct attribute *btrfs_super_attrs[] = {
- &btrfs_super_attr_blocks_used.attr,
- &btrfs_super_attr_total_blocks.attr,
- &btrfs_super_attr_blocksize.attr,
- NULL,
-};
-
-static ssize_t btrfs_super_attr_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
- super_kobj);
- struct btrfs_super_attr *a = container_of(attr,
- struct btrfs_super_attr,
- attr);
-
- return a->show ? a->show(fs, buf) : 0;
-}
-
-static ssize_t btrfs_super_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buf, size_t len)
-{
- struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
- super_kobj);
- struct btrfs_super_attr *a = container_of(attr,
- struct btrfs_super_attr,
- attr);
-
- return a->store ? a->store(fs, buf, len) : 0;
-}
-
-static ssize_t btrfs_root_attr_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct btrfs_root *root = container_of(kobj, struct btrfs_root,
- root_kobj);
- struct btrfs_root_attr *a = container_of(attr,
- struct btrfs_root_attr,
- attr);
-
- return a->show ? a->show(root, buf) : 0;
-}
-
-static ssize_t btrfs_root_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buf, size_t len)
-{
- struct btrfs_root *root = container_of(kobj, struct btrfs_root,
- root_kobj);
- struct btrfs_root_attr *a = container_of(attr,
- struct btrfs_root_attr,
- attr);
- return a->store ? a->store(root, buf, len) : 0;
-}
-
-static void btrfs_super_release(struct kobject *kobj)
-{
- struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
- super_kobj);
- complete(&fs->kobj_unregister);
-}
-
-static void btrfs_root_release(struct kobject *kobj)
-{
- struct btrfs_root *root = container_of(kobj, struct btrfs_root,
- root_kobj);
- complete(&root->kobj_unregister);
-}
-
-static const struct sysfs_ops btrfs_super_attr_ops = {
- .show = btrfs_super_attr_show,
- .store = btrfs_super_attr_store,
-};
-
-static const struct sysfs_ops btrfs_root_attr_ops = {
- .show = btrfs_root_attr_show,
- .store = btrfs_root_attr_store,
-};
-
/* /sys/fs/btrfs/ entry */
static struct kset *btrfs_kset;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2b3590b9fe98..51dcec86757f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -126,28 +126,85 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
* to make sure the old root from before we joined the transaction is deleted
* when the transaction commits
*/
-int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+static int record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
if (root->ref_cows && root->last_trans < trans->transid) {
WARN_ON(root == root->fs_info->extent_root);
WARN_ON(root->commit_root != root->node);
+ /*
+ * see below for in_trans_setup usage rules
+ * we have the reloc mutex held now, so there
+ * is only one writer in this function
+ */
+ root->in_trans_setup = 1;
+
+ /* make sure readers find in_trans_setup before
+ * they find our root->last_trans update
+ */
+ smp_wmb();
+
spin_lock(&root->fs_info->fs_roots_radix_lock);
if (root->last_trans == trans->transid) {
spin_unlock(&root->fs_info->fs_roots_radix_lock);
return 0;
}
- root->last_trans = trans->transid;
radix_tree_tag_set(&root->fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
spin_unlock(&root->fs_info->fs_roots_radix_lock);
+ root->last_trans = trans->transid;
+
+ /* this is pretty tricky. We don't want to
+ * take the relocation lock in btrfs_record_root_in_trans
+ * unless we're really doing the first setup for this root in
+ * this transaction.
+ *
+ * Normally we'd use root->last_trans as a flag to decide
+ * if we want to take the expensive mutex.
+ *
+ * But, we have to set root->last_trans before we
+ * init the relocation root, otherwise, we trip over warnings
+ * in ctree.c. The solution used here is to flag ourselves
+ * with root->in_trans_setup. When this is 1, we're still
+ * fixing up the reloc trees and everyone must wait.
+ *
+ * When this is zero, they can trust root->last_trans and fly
+ * through btrfs_record_root_in_trans without having to take the
+ * lock. smp_wmb() makes sure that all the writes above are
+ * done before we pop in the zero below
+ */
btrfs_init_reloc_root(trans, root);
+ smp_wmb();
+ root->in_trans_setup = 0;
}
return 0;
}
+
+int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ if (!root->ref_cows)
+ return 0;
+
+ /*
+ * see record_root_in_trans for comments about in_trans_setup usage
+ * and barriers
+ */
+ smp_rmb();
+ if (root->last_trans == trans->transid &&
+ !root->in_trans_setup)
+ return 0;
+
+ mutex_lock(&root->fs_info->reloc_mutex);
+ record_root_in_trans(trans, root);
+ mutex_unlock(&root->fs_info->reloc_mutex);
+
+ return 0;
+}
+
/* wait for commit against the current transaction to become unblocked
* when this is done, it is safe to start a new transaction, but the current
* transaction might not be fully on disk.
@@ -882,7 +939,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
parent = dget_parent(dentry);
parent_inode = parent->d_inode;
parent_root = BTRFS_I(parent_inode)->root;
- btrfs_record_root_in_trans(trans, parent_root);
+ record_root_in_trans(trans, parent_root);
/*
* insert the directory item
@@ -900,7 +957,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, parent_root, parent_inode);
BUG_ON(ret);
- btrfs_record_root_in_trans(trans, root);
+ /*
+ * pull in the delayed directory update
+ * and the delayed inode item
+ * otherwise we corrupt the FS during
+ * snapshot
+ */
+ ret = btrfs_run_delayed_items(trans, root);
+ BUG_ON(ret);
+
+ record_root_in_trans(trans, root);
btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
btrfs_check_and_init_root_item(new_root_item);
@@ -961,14 +1027,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
int ret;
list_for_each_entry(pending, head, list) {
- /*
- * We must deal with the delayed items before creating
- * snapshots, or we will create a snapthot with inconsistent
- * information.
- */
- ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
- BUG_ON(ret);
-
ret = create_pending_snapshot(trans, fs_info, pending);
BUG_ON(ret);
}
@@ -1241,21 +1299,42 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
schedule_timeout(1);
finish_wait(&cur_trans->writer_wait, &wait);
- spin_lock(&root->fs_info->trans_lock);
- root->fs_info->trans_no_join = 1;
- spin_unlock(&root->fs_info->trans_lock);
} while (atomic_read(&cur_trans->num_writers) > 1 ||
(should_grow && cur_trans->num_joined != joined));
- ret = create_pending_snapshots(trans, root->fs_info);
- BUG_ON(ret);
+ /*
+ * Ok now we need to make sure to block out any other joins while we
+ * commit the transaction. We could have started a join before setting
+ * no_join so make sure to wait for num_writers to == 1 again.
+ */
+ spin_lock(&root->fs_info->trans_lock);
+ root->fs_info->trans_no_join = 1;
+ spin_unlock(&root->fs_info->trans_lock);
+ wait_event(cur_trans->writer_wait,
+ atomic_read(&cur_trans->num_writers) == 1);
+
+ /*
+ * the reloc mutex makes sure that we stop
+ * the balancing code from coming in and moving
+ * extents around in the middle of the commit
+ */
+ mutex_lock(&root->fs_info->reloc_mutex);
ret = btrfs_run_delayed_items(trans, root);
BUG_ON(ret);
+ ret = create_pending_snapshots(trans, root->fs_info);
+ BUG_ON(ret);
+
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
BUG_ON(ret);
+ /*
+ * make sure none of the code above managed to slip in a
+ * delayed item
+ */
+ btrfs_assert_delayed_root_empty(root);
+
WARN_ON(cur_trans != trans->transaction);
btrfs_scrub_pause(root);
@@ -1312,6 +1391,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
root->fs_info->running_transaction = NULL;
root->fs_info->trans_no_join = 0;
spin_unlock(&root->fs_info->trans_lock);
+ mutex_unlock(&root->fs_info->reloc_mutex);
wake_up(&root->fs_info->transaction_wait);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 592396c6dc47..4ce8a9f41d1e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3177,7 +3177,7 @@ again:
tmp_key.offset = (u64)-1;
wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
- BUG_ON(!wc.replay_dest);
+ BUG_ON(IS_ERR_OR_NULL(wc.replay_dest));
wc.replay_dest->log_root = log;
btrfs_record_root_in_trans(trans, wc.replay_dest);