summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorKees Cook <keescook@chromium.org>2016-07-25 23:50:36 +0300
committerKees Cook <keescook@chromium.org>2016-07-25 23:50:36 +0300
commit74e630a7582e6b3cb39559d712a0049f08dea8a0 (patch)
tree98a752412dcfc74d802024c1d9e8c541b93174f9 /fs
parent35da60941e44dbf57868e67686dd24cc1a33125a (diff)
parent523d939ef98fd712632d93a5a2b588e477a7565e (diff)
downloadlinux-74e630a7582e6b3cb39559d712a0049f08dea8a0.tar.xz
Merge tag 'v4.7' into for-linus/pstore
Linux 4.7
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_file.c6
-rw-r--r--fs/9p/vfs_inode.c2
-rw-r--r--fs/9p/vfs_inode_dotl.c2
-rw-r--r--fs/autofs4/autofs_i.h8
-rw-r--r--fs/autofs4/expire.c27
-rw-r--r--fs/autofs4/root.c2
-rw-r--r--fs/autofs4/waitq.c7
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/binfmt_elf_fdpic.c2
-rw-r--r--fs/btrfs/check-integrity.c2
-rw-r--r--fs/btrfs/ctree.c19
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/delayed-inode.c27
-rw-r--r--fs/btrfs/delayed-inode.h10
-rw-r--r--fs/btrfs/disk-io.c54
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/extent-tree.c33
-rw-r--r--fs/btrfs/extent_io.c42
-rw-r--r--fs/btrfs/extent_io.h4
-rw-r--r--fs/btrfs/file.c44
-rw-r--r--fs/btrfs/free-space-cache.c18
-rw-r--r--fs/btrfs/hash.c5
-rw-r--r--fs/btrfs/hash.h1
-rw-r--r--fs/btrfs/inode.c35
-rw-r--r--fs/btrfs/ordered-data.c9
-rw-r--r--fs/btrfs/ordered-data.h2
-rw-r--r--fs/btrfs/reada.c2
-rw-r--r--fs/btrfs/scrub.c50
-rw-r--r--fs/btrfs/super.c61
-rw-r--r--fs/btrfs/tests/btrfs-tests.c8
-rw-r--r--fs/btrfs/tests/btrfs-tests.h27
-rw-r--r--fs/btrfs/tests/extent-buffer-tests.c13
-rw-r--r--fs/btrfs/tests/extent-io-tests.c86
-rw-r--r--fs/btrfs/tests/free-space-tests.c76
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c30
-rw-r--r--fs/btrfs/tests/inode-tests.c344
-rw-r--r--fs/btrfs/tests/qgroup-tests.c111
-rw-r--r--fs/btrfs/transaction.c10
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-log.c4
-rw-r--r--fs/btrfs/volumes.c149
-rw-r--r--fs/cachefiles/interface.c2
-rw-r--r--fs/ceph/addr.c6
-rw-r--r--fs/ceph/cache.c141
-rw-r--r--fs/ceph/cache.h44
-rw-r--r--fs/ceph/caps.c23
-rw-r--r--fs/ceph/export.c10
-rw-r--r--fs/ceph/file.c29
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/cifs/cifs_unicode.c33
-rw-r--r--fs/cifs/cifs_unicode.h2
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/connect.c4
-rw-r--r--fs/cifs/dir.c2
-rw-r--r--fs/cifs/file.c14
-rw-r--r--fs/cifs/ntlmssp.h2
-rw-r--r--fs/cifs/sess.c80
-rw-r--r--fs/cifs/smb2pdu.c37
-rw-r--r--fs/configfs/file.c2
-rw-r--r--fs/coredump.c4
-rw-r--r--fs/dax.c7
-rw-r--r--fs/dcache.c79
-rw-r--r--fs/debugfs/file.c7
-rw-r--r--fs/devpts/inode.c191
-rw-r--r--fs/ecryptfs/crypto.c8
-rw-r--r--fs/ecryptfs/file.c19
-rw-r--r--fs/ecryptfs/main.c3
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/fscache/page.c2
-rw-r--r--fs/fuse/dir.c6
-rw-r--r--fs/fuse/fuse_i.h9
-rw-r--r--fs/fuse/inode.c19
-rw-r--r--fs/gfs2/inode.c2
-rw-r--r--fs/internal.h1
-rw-r--r--fs/jbd2/journal.c32
-rw-r--r--fs/libfs.c113
-rw-r--r--fs/lockd/svc.c13
-rw-r--r--fs/locks.c2
-rw-r--r--fs/namei.c110
-rw-r--r--fs/namespace.c11
-rw-r--r--fs/nfs/dir.c41
-rw-r--r--fs/nfs/direct.c10
-rw-r--r--fs/nfs/inode.c1
-rw-r--r--fs/nfs/nfs4proc.c18
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/pnfs.c10
-rw-r--r--fs/nfs/pnfs_nfs.c12
-rw-r--r--fs/nfs/read.c4
-rw-r--r--fs/nfsd/blocklayout.c2
-rw-r--r--fs/nfsd/nfs2acl.c20
-rw-r--r--fs/nfsd/nfs3acl.c16
-rw-r--r--fs/nfsd/nfs4acl.c16
-rw-r--r--fs/nfsd/nfs4callback.c18
-rw-r--r--fs/nfsd/nfs4state.c67
-rw-r--r--fs/nfsd/state.h2
-rw-r--r--fs/nilfs2/the_nilfs.c2
-rw-r--r--fs/ocfs2/Makefile2
-rw-r--r--fs/ocfs2/buffer_head_io.c5
-rw-r--r--fs/overlayfs/dir.c67
-rw-r--r--fs/overlayfs/inode.c61
-rw-r--r--fs/overlayfs/overlayfs.h1
-rw-r--r--fs/overlayfs/super.c12
-rw-r--r--fs/posix_acl.c42
-rw-r--r--fs/proc/root.c7
-rw-r--r--fs/reiserfs/super.c9
-rw-r--r--fs/ubifs/file.c24
-rw-r--r--fs/udf/partition.c13
-rw-r--r--fs/udf/super.c22
-rw-r--r--fs/udf/udf_sb.h5
-rw-r--r--fs/xfs/xfs_ioctl.c6
111 files changed, 1739 insertions, 1197 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index b84c291ba1eb..d7b78d531e63 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -74,7 +74,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
v9fs_proto_dotu(v9ses));
fid = file->private_data;
if (!fid) {
- fid = v9fs_fid_clone(file->f_path.dentry);
+ fid = v9fs_fid_clone(file_dentry(file));
if (IS_ERR(fid))
return PTR_ERR(fid);
@@ -100,7 +100,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
* because we want write after unlink usecase
* to work.
*/
- fid = v9fs_writeback_fid(file->f_path.dentry);
+ fid = v9fs_writeback_fid(file_dentry(file));
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
mutex_unlock(&v9inode->v_mutex);
@@ -516,7 +516,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
* because we want write after unlink usecase
* to work.
*/
- fid = v9fs_writeback_fid(filp->f_path.dentry);
+ fid = v9fs_writeback_fid(file_dentry(filp));
if (IS_ERR(fid)) {
retval = PTR_ERR(fid);
mutex_unlock(&v9inode->v_mutex);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index f4645c515262..e2e7c749925a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -853,7 +853,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct p9_fid *fid, *inode_fid;
struct dentry *res = NULL;
- if (d_unhashed(dentry)) {
+ if (d_in_lookup(dentry)) {
res = v9fs_vfs_lookup(dir, dentry, 0);
if (IS_ERR(res))
return PTR_ERR(res);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index a34702c998f5..1b51eaa5e2dd 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -254,7 +254,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
struct posix_acl *pacl = NULL, *dacl = NULL;
struct dentry *res = NULL;
- if (d_unhashed(dentry)) {
+ if (d_in_lookup(dentry)) {
res = v9fs_vfs_lookup(dir, dentry, 0);
if (IS_ERR(res))
return PTR_ERR(res);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index f0d268b97d19..a439548de785 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -70,9 +70,13 @@ struct autofs_info {
};
#define AUTOFS_INF_EXPIRING (1<<0) /* dentry in the process of expiring */
-#define AUTOFS_INF_NO_RCU (1<<1) /* the dentry is being considered
+#define AUTOFS_INF_WANT_EXPIRE (1<<1) /* the dentry is being considered
* for expiry, so RCU_walk is
- * not permitted
+ * not permitted. If it progresses to
+ * actual expiry attempt, the flag is
+ * not cleared when EXPIRING is set -
+ * in that case it gets cleared only
+ * when it comes to clearing EXPIRING.
*/
#define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 9510d8d2e9cd..b493909e7492 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -316,19 +316,17 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
if (ino->flags & AUTOFS_INF_PENDING)
goto out;
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
- ino->flags |= AUTOFS_INF_NO_RCU;
+ ino->flags |= AUTOFS_INF_WANT_EXPIRE;
spin_unlock(&sbi->fs_lock);
synchronize_rcu();
spin_lock(&sbi->fs_lock);
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
ino->flags |= AUTOFS_INF_EXPIRING;
- smp_mb();
- ino->flags &= ~AUTOFS_INF_NO_RCU;
init_completion(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
return root;
}
- ino->flags &= ~AUTOFS_INF_NO_RCU;
+ ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
}
out:
spin_unlock(&sbi->fs_lock);
@@ -446,7 +444,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
while ((dentry = get_next_positive_subdir(dentry, root))) {
spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry);
- if (ino->flags & AUTOFS_INF_NO_RCU)
+ if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
expired = NULL;
else
expired = should_expire(dentry, mnt, timeout, how);
@@ -455,7 +453,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
continue;
}
ino = autofs4_dentry_ino(expired);
- ino->flags |= AUTOFS_INF_NO_RCU;
+ ino->flags |= AUTOFS_INF_WANT_EXPIRE;
spin_unlock(&sbi->fs_lock);
synchronize_rcu();
spin_lock(&sbi->fs_lock);
@@ -465,7 +463,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
goto found;
}
- ino->flags &= ~AUTOFS_INF_NO_RCU;
+ ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
if (expired != dentry)
dput(expired);
spin_unlock(&sbi->fs_lock);
@@ -475,17 +473,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
found:
pr_debug("returning %p %pd\n", expired, expired);
ino->flags |= AUTOFS_INF_EXPIRING;
- smp_mb();
- ino->flags &= ~AUTOFS_INF_NO_RCU;
init_completion(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
- spin_lock(&sbi->lookup_lock);
- spin_lock(&expired->d_parent->d_lock);
- spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
- list_move(&expired->d_parent->d_subdirs, &expired->d_child);
- spin_unlock(&expired->d_lock);
- spin_unlock(&expired->d_parent->d_lock);
- spin_unlock(&sbi->lookup_lock);
return expired;
}
@@ -496,7 +485,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
int status;
/* Block on any pending expire */
- if (!(ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU)))
+ if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE))
return 0;
if (rcu_walk)
return -ECHILD;
@@ -554,7 +543,7 @@ int autofs4_expire_run(struct super_block *sb,
ino = autofs4_dentry_ino(dentry);
/* avoid rapid-fire expire attempts if expiry fails */
ino->last_used = now;
- ino->flags &= ~AUTOFS_INF_EXPIRING;
+ ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
@@ -583,7 +572,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
spin_lock(&sbi->fs_lock);
/* avoid rapid-fire expire attempts if expiry fails */
ino->last_used = now;
- ino->flags &= ~AUTOFS_INF_EXPIRING;
+ ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
dput(dentry);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 78bd80298528..3767f6641af1 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -458,7 +458,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
*/
struct inode *inode;
- if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU))
+ if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
return 0;
if (d_mountpoint(dentry))
return 0;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 0146d911f468..631f1554c87b 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -66,11 +66,12 @@ static int autofs4_write(struct autofs_sb_info *sbi,
set_fs(KERNEL_DS);
mutex_lock(&sbi->pipe_mutex);
- wr = __vfs_write(file, data, bytes, &file->f_pos);
- while (bytes && wr) {
+ while (bytes) {
+ wr = __vfs_write(file, data, bytes, &file->f_pos);
+ if (wr <= 0)
+ break;
data += wr;
bytes -= wr;
- wr = __vfs_write(file, data, bytes, &file->f_pos);
}
mutex_unlock(&sbi->pipe_mutex);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index e158b22ef32f..a7a28110dc80 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2275,7 +2275,7 @@ static int elf_core_dump(struct coredump_params *cprm)
goto end_coredump;
/* Align to page */
- if (!dump_skip(cprm, dataoff - cprm->file->f_pos))
+ if (!dump_skip(cprm, dataoff - cprm->pos))
goto end_coredump;
for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 71ade0e556b7..203589311bf8 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1787,7 +1787,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
goto end_coredump;
}
- if (!dump_skip(cprm, dataoff - cprm->file->f_pos))
+ if (!dump_skip(cprm, dataoff - cprm->pos))
goto end_coredump;
if (!elf_fdpic_dump_segments(cprm))
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index b677a6ea6001..7706c8dc5fa6 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -2645,7 +2645,7 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
* This algorithm is recursive because the amount of used stack space
* is very small and the max recursion depth is limited.
*/
- indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
+ indent_add = sprintf(buf, "%c-%llu(%s/%llu/%u)",
btrfsic_get_block_type(state, block),
block->logical_bytenr, block->dev_state->name,
block->dev_bytenr, block->mirror_num);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 427c36b430a6..a85cf7d23309 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1373,7 +1373,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
BUG_ON(tm->slot != 0);
- eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
+ eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start,
+ eb->len);
if (!eb_rewin) {
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
@@ -1454,7 +1455,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
} else if (old_root) {
btrfs_tree_read_unlock(eb_root);
free_extent_buffer(eb_root);
- eb = alloc_dummy_extent_buffer(root->fs_info, logical);
+ eb = alloc_dummy_extent_buffer(root->fs_info, logical,
+ root->nodesize);
} else {
btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
eb = btrfs_clone_extent_buffer(eb_root);
@@ -1552,6 +1554,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
trans->transid, root->fs_info->generation);
if (!should_cow_block(trans, root, buf)) {
+ trans->dirty = true;
*cow_ret = buf;
return 0;
}
@@ -1783,10 +1786,12 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
if (!err) {
tmp = (struct btrfs_disk_key *)(kaddr + offset -
map_start);
- } else {
+ } else if (err == 1) {
read_extent_buffer(eb, &unaligned,
offset, sizeof(unaligned));
tmp = &unaligned;
+ } else {
+ return err;
}
} else {
@@ -2510,6 +2515,8 @@ read_block_for_search(struct btrfs_trans_handle *trans,
if (!btrfs_buffer_uptodate(tmp, 0, 0))
ret = -EIO;
free_extent_buffer(tmp);
+ } else {
+ ret = PTR_ERR(tmp);
}
return ret;
}
@@ -2773,8 +2780,10 @@ again:
* then we don't want to set the path blocking,
* so we test it here
*/
- if (!should_cow_block(trans, root, b))
+ if (!should_cow_block(trans, root, b)) {
+ trans->dirty = true;
goto cow_done;
+ }
/*
* must have write locks on this node and the
@@ -2823,6 +2832,8 @@ cow_done:
}
ret = key_search(b, key, level, &prev_cmp, &slot);
+ if (ret < 0)
+ goto done;
if (level != 0) {
int dec = 0;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 101c3cfd3f7c..4274a7bfdaed 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2518,7 +2518,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
int btrfs_async_run_delayed_refs(struct btrfs_root *root,
- unsigned long count, int wait);
+ unsigned long count, u64 transid, int wait);
int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 61561c2a3f96..d3aaabbfada0 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1606,15 +1606,23 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
return 0;
}
-void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
- struct list_head *del_list)
+bool btrfs_readdir_get_delayed_items(struct inode *inode,
+ struct list_head *ins_list,
+ struct list_head *del_list)
{
struct btrfs_delayed_node *delayed_node;
struct btrfs_delayed_item *item;
delayed_node = btrfs_get_delayed_node(inode);
if (!delayed_node)
- return;
+ return false;
+
+ /*
+ * We can only do one readdir with delayed items at a time because of
+ * item->readdir_list.
+ */
+ inode_unlock_shared(inode);
+ inode_lock(inode);
mutex_lock(&delayed_node->mutex);
item = __btrfs_first_delayed_insertion_item(delayed_node);
@@ -1641,10 +1649,13 @@ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
* requeue or dequeue this delayed node.
*/
atomic_dec(&delayed_node->refs);
+
+ return true;
}
-void btrfs_put_delayed_items(struct list_head *ins_list,
- struct list_head *del_list)
+void btrfs_readdir_put_delayed_items(struct inode *inode,
+ struct list_head *ins_list,
+ struct list_head *del_list)
{
struct btrfs_delayed_item *curr, *next;
@@ -1659,6 +1670,12 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
if (atomic_dec_and_test(&curr->refs))
kfree(curr);
}
+
+ /*
+ * The VFS is going to do up_read(), so we need to downgrade back to a
+ * read lock.
+ */
+ downgrade_write(&inode->i_rwsem);
}
int btrfs_should_delete_dir_index(struct list_head *del_list,
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 0167853c84ae..2495b3d4075f 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -137,10 +137,12 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
void btrfs_destroy_delayed_inodes(struct btrfs_root *root);
/* Used for readdir() */
-void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
- struct list_head *del_list);
-void btrfs_put_delayed_items(struct list_head *ins_list,
- struct list_head *del_list);
+bool btrfs_readdir_get_delayed_items(struct inode *inode,
+ struct list_head *ins_list,
+ struct list_head *del_list);
+void btrfs_readdir_put_delayed_items(struct inode *inode,
+ struct list_head *ins_list,
+ struct list_head *del_list);
int btrfs_should_delete_dir_index(struct list_head *del_list,
u64 index);
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6628fca9f4ed..60ce1190307b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1098,7 +1098,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr)
struct inode *btree_inode = root->fs_info->btree_inode;
buf = btrfs_find_create_tree_block(root, bytenr);
- if (!buf)
+ if (IS_ERR(buf))
return;
read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
buf, 0, WAIT_NONE, btree_get_extent, 0);
@@ -1114,7 +1114,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
int ret;
buf = btrfs_find_create_tree_block(root, bytenr);
- if (!buf)
+ if (IS_ERR(buf))
return 0;
set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
@@ -1147,7 +1147,8 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr)
{
if (btrfs_test_is_dummy_root(root))
- return alloc_test_extent_buffer(root->fs_info, bytenr);
+ return alloc_test_extent_buffer(root->fs_info, bytenr,
+ root->nodesize);
return alloc_extent_buffer(root->fs_info, bytenr);
}
@@ -1171,8 +1172,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
int ret;
buf = btrfs_find_create_tree_block(root, bytenr);
- if (!buf)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(buf))
+ return buf;
ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
if (ret) {
@@ -1314,14 +1315,16 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
/* Should only be used by the testing infrastructure */
-struct btrfs_root *btrfs_alloc_dummy_root(void)
+struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize)
{
struct btrfs_root *root;
root = btrfs_alloc_root(NULL, GFP_KERNEL);
if (!root)
return ERR_PTR(-ENOMEM);
- __setup_root(4096, 4096, 4096, root, NULL, 1);
+ /* We don't use the stripesize in selftest, set it as sectorsize */
+ __setup_root(nodesize, sectorsize, sectorsize, root, NULL,
+ BTRFS_ROOT_TREE_OBJECTID);
set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state);
root->alloc_bytenr = 0;
@@ -1803,6 +1806,13 @@ static int cleaner_kthread(void *arg)
if (btrfs_need_cleaner_sleep(root))
goto sleep;
+ /*
+ * Do not do anything if we might cause open_ctree() to block
+ * before we have finished mounting the filesystem.
+ */
+ if (!root->fs_info->open)
+ goto sleep;
+
if (!mutex_trylock(&root->fs_info->cleaner_mutex))
goto sleep;
@@ -2517,7 +2527,6 @@ int open_ctree(struct super_block *sb,
int num_backups_tried = 0;
int backup_index = 0;
int max_active;
- bool cleaner_mutex_locked = false;
tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
@@ -2797,7 +2806,7 @@ int open_ctree(struct super_block *sb,
nodesize = btrfs_super_nodesize(disk_super);
sectorsize = btrfs_super_sectorsize(disk_super);
- stripesize = btrfs_super_stripesize(disk_super);
+ stripesize = sectorsize;
fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids));
fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
@@ -2996,13 +3005,6 @@ retry_root_backup:
goto fail_sysfs;
}
- /*
- * Hold the cleaner_mutex thread here so that we don't block
- * for a long time on btrfs_recover_relocation. cleaner_kthread
- * will wait for us to finish mounting the filesystem.
- */
- mutex_lock(&fs_info->cleaner_mutex);
- cleaner_mutex_locked = true;
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
"btrfs-cleaner");
if (IS_ERR(fs_info->cleaner_kthread))
@@ -3062,8 +3064,10 @@ retry_root_backup:
ret = btrfs_cleanup_fs_roots(fs_info);
if (ret)
goto fail_qgroup;
- /* We locked cleaner_mutex before creating cleaner_kthread. */
+
+ mutex_lock(&fs_info->cleaner_mutex);
ret = btrfs_recover_relocation(tree_root);
+ mutex_unlock(&fs_info->cleaner_mutex);
if (ret < 0) {
btrfs_warn(fs_info, "failed to recover relocation: %d",
ret);
@@ -3071,8 +3075,6 @@ retry_root_backup:
goto fail_qgroup;
}
}
- mutex_unlock(&fs_info->cleaner_mutex);
- cleaner_mutex_locked = false;
location.objectid = BTRFS_FS_TREE_OBJECTID;
location.type = BTRFS_ROOT_ITEM_KEY;
@@ -3186,10 +3188,6 @@ fail_cleaner:
filemap_write_and_wait(fs_info->btree_inode->i_mapping);
fail_sysfs:
- if (cleaner_mutex_locked) {
- mutex_unlock(&fs_info->cleaner_mutex);
- cleaner_mutex_locked = false;
- }
btrfs_sysfs_remove_mounted(fs_info);
fail_fsdev_sysfs:
@@ -4130,6 +4128,16 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
* Hint to catch really bogus numbers, bitflips or so, more exact checks are
* done later
*/
+ if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
+ btrfs_err(fs_info, "bytes_used is too small %llu",
+ btrfs_super_bytes_used(sb));
+ ret = -EINVAL;
+ }
+ if (!is_power_of_2(btrfs_super_stripesize(sb))) {
+ btrfs_err(fs_info, "invalid stripesize %u",
+ btrfs_super_stripesize(sb));
+ ret = -EINVAL;
+ }
if (btrfs_super_num_devices(sb) > (1UL << 31))
printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
btrfs_super_num_devices(sb));
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 8e79d0070bcf..acba821499a9 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -90,7 +90,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
void btrfs_free_fs_root(struct btrfs_root *root);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-struct btrfs_root *btrfs_alloc_dummy_root(void);
+struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize);
#endif
/*
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a400951e8678..82b912a293ab 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2042,6 +2042,11 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
struct btrfs_bio *bbio = NULL;
+ /*
+ * Avoid races with device replace and make sure our bbio has devices
+ * associated to its stripes that don't go away while we are discarding.
+ */
+ btrfs_bio_counter_inc_blocked(root->fs_info);
/* Tell the block device(s) that the sectors can be discarded */
ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
bytenr, &num_bytes, &bbio, 0);
@@ -2074,6 +2079,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
}
btrfs_put_bbio(bbio);
}
+ btrfs_bio_counter_dec(root->fs_info);
if (actual_bytes)
*actual_bytes = discarded_bytes;
@@ -2829,6 +2835,7 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
struct async_delayed_refs {
struct btrfs_root *root;
+ u64 transid;
int count;
int error;
int sync;
@@ -2844,6 +2851,10 @@ static void delayed_ref_async_start(struct btrfs_work *work)
async = container_of(work, struct async_delayed_refs, work);
+ /* if the commit is already started, we don't need to wait here */
+ if (btrfs_transaction_blocked(async->root->fs_info))
+ goto done;
+
trans = btrfs_join_transaction(async->root);
if (IS_ERR(trans)) {
async->error = PTR_ERR(trans);
@@ -2855,10 +2866,15 @@ static void delayed_ref_async_start(struct btrfs_work *work)
* wait on delayed refs
*/
trans->sync = true;
+
+ /* Don't bother flushing if we got into a different transaction */
+ if (trans->transid > async->transid)
+ goto end;
+
ret = btrfs_run_delayed_refs(trans, async->root, async->count);
if (ret)
async->error = ret;
-
+end:
ret = btrfs_end_transaction(trans, async->root);
if (ret && !async->error)
async->error = ret;
@@ -2870,7 +2886,7 @@ done:
}
int btrfs_async_run_delayed_refs(struct btrfs_root *root,
- unsigned long count, int wait)
+ unsigned long count, u64 transid, int wait)
{
struct async_delayed_refs *async;
int ret;
@@ -2882,6 +2898,7 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root,
async->root = root->fs_info->tree_root;
async->count = count;
async->error = 0;
+ async->transid = transid;
if (wait)
async->sync = 1;
else
@@ -8010,8 +8027,9 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf;
buf = btrfs_find_create_tree_block(root, bytenr);
- if (!buf)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(buf))
+ return buf;
+
btrfs_set_header_generation(buf, trans->transid);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf);
@@ -8038,7 +8056,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
buf->start + buf->len - 1, GFP_NOFS);
}
- trans->blocks_used++;
+ trans->dirty = true;
/* this returns a buffer locked for blocking */
return buf;
}
@@ -8653,8 +8671,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
next = btrfs_find_tree_block(root->fs_info, bytenr);
if (!next) {
next = btrfs_find_create_tree_block(root, bytenr);
- if (!next)
- return -ENOMEM;
+ if (IS_ERR(next))
+ return PTR_ERR(next);
+
btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
level - 1);
reada = 1;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3cd57825c75f..75533adef998 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2025,9 +2025,16 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
bio->bi_iter.bi_size = 0;
map_length = length;
+ /*
+ * Avoid races with device replace and make sure our bbio has devices
+ * associated to its stripes that don't go away while we are doing the
+ * read repair operation.
+ */
+ btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_block(fs_info, WRITE, logical,
&map_length, &bbio, mirror_num);
if (ret) {
+ btrfs_bio_counter_dec(fs_info);
bio_put(bio);
return -EIO;
}
@@ -2037,6 +2044,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
dev = bbio->stripes[mirror_num-1].dev;
btrfs_put_bbio(bbio);
if (!dev || !dev->bdev || !dev->writeable) {
+ btrfs_bio_counter_dec(fs_info);
bio_put(bio);
return -EIO;
}
@@ -2045,6 +2053,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
/* try to remap that extent elsewhere? */
+ btrfs_bio_counter_dec(fs_info);
bio_put(bio);
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
return -EIO;
@@ -2054,6 +2063,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
btrfs_ino(inode), start,
rcu_str_deref(dev->name), sector);
+ btrfs_bio_counter_dec(fs_info);
bio_put(bio);
return 0;
}
@@ -4718,16 +4728,16 @@ err:
}
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
- u64 start)
+ u64 start, u32 nodesize)
{
unsigned long len;
if (!fs_info) {
/*
* Called only from tests that don't always have a fs_info
- * available, but we know that nodesize is 4096
+ * available
*/
- len = 4096;
+ len = nodesize;
} else {
len = fs_info->tree_root->nodesize;
}
@@ -4823,7 +4833,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
- u64 start)
+ u64 start, u32 nodesize)
{
struct extent_buffer *eb, *exists = NULL;
int ret;
@@ -4831,7 +4841,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
eb = find_extent_buffer(fs_info, start);
if (eb)
return eb;
- eb = alloc_dummy_extent_buffer(fs_info, start);
+ eb = alloc_dummy_extent_buffer(fs_info, start, nodesize);
if (!eb)
return NULL;
eb->fs_info = fs_info;
@@ -4882,18 +4892,25 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
int uptodate = 1;
int ret;
+ if (!IS_ALIGNED(start, fs_info->tree_root->sectorsize)) {
+ btrfs_err(fs_info, "bad tree block start %llu", start);
+ return ERR_PTR(-EINVAL);
+ }
+
eb = find_extent_buffer(fs_info, start);
if (eb)
return eb;
eb = __alloc_extent_buffer(fs_info, start, len);
if (!eb)
- return NULL;
+ return ERR_PTR(-ENOMEM);
for (i = 0; i < num_pages; i++, index++) {
p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
- if (!p)
+ if (!p) {
+ exists = ERR_PTR(-ENOMEM);
goto free_eb;
+ }
spin_lock(&mapping->private_lock);
if (PagePrivate(p)) {
@@ -4938,8 +4955,10 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
again:
ret = radix_tree_preload(GFP_NOFS);
- if (ret)
+ if (ret) {
+ exists = ERR_PTR(ret);
goto free_eb;
+ }
spin_lock(&fs_info->buffer_lock);
ret = radix_tree_insert(&fs_info->buffer_radix,
@@ -5323,6 +5342,11 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
return ret;
}
+/*
+ * return 0 if the item is found within a page.
+ * return 1 if the item spans two pages.
+ * return -EINVAL otherwise.
+ */
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
unsigned long min_len, char **map,
unsigned long *map_start,
@@ -5337,7 +5361,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
PAGE_SHIFT;
if (i != end_i)
- return -EINVAL;
+ return 1;
if (i == 0) {
offset = start_offset;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1baf19c9b79d..c0c1c4fef6ce 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -348,7 +348,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len);
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
- u64 start);
+ u64 start, u32 nodesize);
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
@@ -468,5 +468,5 @@ noinline u64 find_lock_delalloc_range(struct inode *inode,
u64 *end, u64 max_bytes);
#endif
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
- u64 start);
+ u64 start, u32 nodesize);
#endif
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e0c9bd3fb02d..2234e88cf674 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1534,30 +1534,30 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
reserve_bytes = round_up(write_bytes + sector_offset,
root->sectorsize);
- if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
- BTRFS_INODE_PREALLOC)) &&
- check_can_nocow(inode, pos, &write_bytes) > 0) {
- /*
- * For nodata cow case, no need to reserve
- * data space.
- */
- only_release_metadata = true;
- /*
- * our prealloc extent may be smaller than
- * write_bytes, so scale down.
- */
- num_pages = DIV_ROUND_UP(write_bytes + offset,
- PAGE_SIZE);
- reserve_bytes = round_up(write_bytes + sector_offset,
- root->sectorsize);
- goto reserve_metadata;
- }
-
ret = btrfs_check_data_free_space(inode, pos, write_bytes);
- if (ret < 0)
- break;
+ if (ret < 0) {
+ if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_PREALLOC)) &&
+ check_can_nocow(inode, pos, &write_bytes) > 0) {
+ /*
+ * For nodata cow case, no need to reserve
+ * data space.
+ */
+ only_release_metadata = true;
+ /*
+ * our prealloc extent may be smaller than
+ * write_bytes, so scale down.
+ */
+ num_pages = DIV_ROUND_UP(write_bytes + offset,
+ PAGE_SIZE);
+ reserve_bytes = round_up(write_bytes +
+ sector_offset,
+ root->sectorsize);
+ } else {
+ break;
+ }
+ }
-reserve_metadata:
ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
if (ret) {
if (!only_release_metadata)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index c6dc1183f542..69d270f6602c 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -29,7 +29,7 @@
#include "inode-map.h"
#include "volumes.h"
-#define BITS_PER_BITMAP (PAGE_SIZE * 8)
+#define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
#define MAX_CACHE_BYTES_PER_GIG SZ_32K
struct btrfs_trim_range {
@@ -1415,11 +1415,11 @@ static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
u64 offset)
{
u64 bitmap_start;
- u32 bytes_per_bitmap;
+ u64 bytes_per_bitmap;
bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
bitmap_start = offset - ctl->start;
- bitmap_start = div_u64(bitmap_start, bytes_per_bitmap);
+ bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
bitmap_start *= bytes_per_bitmap;
bitmap_start += ctl->start;
@@ -1638,10 +1638,10 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
u64 bitmap_bytes;
u64 extent_bytes;
u64 size = block_group->key.offset;
- u32 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
- u32 max_bitmaps = div_u64(size + bytes_per_bg - 1, bytes_per_bg);
+ u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
+ u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
- max_bitmaps = max_t(u32, max_bitmaps, 1);
+ max_bitmaps = max_t(u64, max_bitmaps, 1);
ASSERT(ctl->total_bitmaps <= max_bitmaps);
@@ -1660,7 +1660,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
* sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
* we add more bitmaps.
*/
- bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_SIZE;
+ bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit;
if (bitmap_bytes >= max_bytes) {
ctl->extents_thresh = 0;
@@ -3662,7 +3662,7 @@ have_info:
if (tmp->offset + tmp->bytes < offset)
break;
if (offset + bytes < tmp->offset) {
- n = rb_prev(&info->offset_index);
+ n = rb_prev(&tmp->offset_index);
continue;
}
info = tmp;
@@ -3676,7 +3676,7 @@ have_info:
if (offset + bytes < tmp->offset)
break;
if (tmp->offset + tmp->bytes < offset) {
- n = rb_next(&info->offset_index);
+ n = rb_next(&tmp->offset_index);
continue;
}
info = tmp;
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c
index aae520b2aee5..a97fdc156a03 100644
--- a/fs/btrfs/hash.c
+++ b/fs/btrfs/hash.c
@@ -24,6 +24,11 @@ int __init btrfs_hash_init(void)
return PTR_ERR_OR_ZERO(tfm);
}
+const char* btrfs_crc32c_impl(void)
+{
+ return crypto_tfm_alg_driver_name(crypto_shash_tfm(tfm));
+}
+
void btrfs_hash_exit(void)
{
crypto_free_shash(tfm);
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h
index 118a2316e5d3..c3a2ec554361 100644
--- a/fs/btrfs/hash.h
+++ b/fs/btrfs/hash.h
@@ -22,6 +22,7 @@
int __init btrfs_hash_init(void);
void btrfs_hash_exit(void);
+const char* btrfs_crc32c_impl(void);
u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 270499598ed4..4421954720b8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3271,7 +3271,16 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
/* grab metadata reservation from transaction handle */
if (reserve) {
ret = btrfs_orphan_reserve_metadata(trans, inode);
- BUG_ON(ret); /* -ENOSPC in reservation; Logic error? JDM */
+ ASSERT(!ret);
+ if (ret) {
+ atomic_dec(&root->orphan_inodes);
+ clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
+ &BTRFS_I(inode)->runtime_flags);
+ if (insert)
+ clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
+ &BTRFS_I(inode)->runtime_flags);
+ return ret;
+ }
}
/* insert an orphan item to track this unlinked/truncated file */
@@ -4549,6 +4558,7 @@ delete:
BUG_ON(ret);
if (btrfs_should_throttle_delayed_refs(trans, root))
btrfs_async_run_delayed_refs(root,
+ trans->transid,
trans->delayed_ref_updates * 2, 0);
if (be_nice) {
if (truncate_space_check(trans, root,
@@ -5748,6 +5758,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
int name_len;
int is_curr = 0; /* ctx->pos points to the current index? */
bool emitted;
+ bool put = false;
/* FIXME, use a real flag for deciding about the key type */
if (root->fs_info->tree_root == root)
@@ -5765,7 +5776,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
if (key_type == BTRFS_DIR_INDEX_KEY) {
INIT_LIST_HEAD(&ins_list);
INIT_LIST_HEAD(&del_list);
- btrfs_get_delayed_items(inode, &ins_list, &del_list);
+ put = btrfs_readdir_get_delayed_items(inode, &ins_list,
+ &del_list);
}
key.type = key_type;
@@ -5912,8 +5924,8 @@ next:
nopos:
ret = 0;
err:
- if (key_type == BTRFS_DIR_INDEX_KEY)
- btrfs_put_delayed_items(&ins_list, &del_list);
+ if (put)
+ btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
btrfs_free_path(path);
return ret;
}
@@ -6979,7 +6991,18 @@ insert:
* existing will always be non-NULL, since there must be
* extent causing the -EEXIST.
*/
- if (start >= extent_map_end(existing) ||
+ if (existing->start == em->start &&
+ extent_map_end(existing) == extent_map_end(em) &&
+ em->block_start == existing->block_start) {
+ /*
+ * these two extents are the same, it happens
+ * with inlines especially
+ */
+ free_extent_map(em);
+ em = existing;
+ err = 0;
+
+ } else if (start >= extent_map_end(existing) ||
start <= existing->start) {
/*
* The existing extent map is the one nearest to
@@ -10514,7 +10537,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
static const struct file_operations btrfs_dir_file_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
- .iterate = btrfs_real_readdir,
+ .iterate_shared = btrfs_real_readdir,
.unlocked_ioctl = btrfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_compat_ioctl,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 559170464d7c..aca8264f4a49 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -718,12 +718,13 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
return count;
}
-void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
+int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
const u64 range_start, const u64 range_len)
{
struct btrfs_root *root;
struct list_head splice;
int done;
+ int total_done = 0;
INIT_LIST_HEAD(&splice);
@@ -742,6 +743,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
done = btrfs_wait_ordered_extents(root, nr,
range_start, range_len);
btrfs_put_fs_root(root);
+ total_done += done;
spin_lock(&fs_info->ordered_root_lock);
if (nr != -1) {
@@ -752,6 +754,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
list_splice_tail(&splice, &fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock);
mutex_unlock(&fs_info->ordered_operations_mutex);
+
+ return total_done;
}
/*
@@ -964,6 +968,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct rb_node *prev = NULL;
struct btrfs_ordered_extent *test;
int ret = 1;
+ u64 orig_offset = offset;
spin_lock_irq(&tree->lock);
if (ordered) {
@@ -979,7 +984,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
/* truncate file */
if (disk_i_size > i_size) {
- BTRFS_I(inode)->disk_i_size = i_size;
+ BTRFS_I(inode)->disk_i_size = orig_offset;
ret = 0;
goto out;
}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 2049c9be85ee..451507776ff5 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -199,7 +199,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
u32 *sum, int len);
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
const u64 range_start, const u64 range_len);
-void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
+int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
const u64 range_start, const u64 range_len);
void btrfs_get_logged_extents(struct inode *inode,
struct list_head *logged_list,
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 298631eaee78..8428db7cd88f 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -761,12 +761,14 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
do {
enqueued = 0;
+ mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (atomic_read(&device->reada_in_flight) <
MAX_IN_FLIGHT)
enqueued += reada_start_machine_dev(fs_info,
device);
}
+ mutex_unlock(&fs_devices->device_list_mutex);
total += enqueued;
} while (enqueued && total < 10000);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 46d847f66e4b..70427ef66b04 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3582,6 +3582,46 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
*/
scrub_pause_on(fs_info);
ret = btrfs_inc_block_group_ro(root, cache);
+ if (!ret && is_dev_replace) {
+ /*
+ * If we are doing a device replace wait for any tasks
+ * that started dellaloc right before we set the block
+ * group to RO mode, as they might have just allocated
+ * an extent from it or decided they could do a nocow
+ * write. And if any such tasks did that, wait for their
+ * ordered extents to complete and then commit the
+ * current transaction, so that we can later see the new
+ * extent items in the extent tree - the ordered extents
+ * create delayed data references (for cow writes) when
+ * they complete, which will be run and insert the
+ * corresponding extent items into the extent tree when
+ * we commit the transaction they used when running
+ * inode.c:btrfs_finish_ordered_io(). We later use
+ * the commit root of the extent tree to find extents
+ * to copy from the srcdev into the tgtdev, and we don't
+ * want to miss any new extents.
+ */
+ btrfs_wait_block_group_reservations(cache);
+ btrfs_wait_nocow_writers(cache);
+ ret = btrfs_wait_ordered_roots(fs_info, -1,
+ cache->key.objectid,
+ cache->key.offset);
+ if (ret > 0) {
+ struct btrfs_trans_handle *trans;
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans))
+ ret = PTR_ERR(trans);
+ else
+ ret = btrfs_commit_transaction(trans,
+ root);
+ if (ret) {
+ scrub_pause_off(fs_info);
+ btrfs_put_block_group(cache);
+ break;
+ }
+ }
+ }
scrub_pause_off(fs_info);
if (ret == 0) {
@@ -3602,9 +3642,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
break;
}
+ btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
dev_replace->cursor_right = found_key.offset + length;
dev_replace->cursor_left = found_key.offset;
dev_replace->item_needs_writeback = 1;
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
found_key.offset, cache, is_dev_replace);
@@ -3640,6 +3682,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
scrub_pause_off(fs_info);
+ btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
+ dev_replace->cursor_left = dev_replace->cursor_right;
+ dev_replace->item_needs_writeback = 1;
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
+
if (ro_set)
btrfs_dec_block_group_ro(root, cache);
@@ -3677,9 +3724,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
ret = -ENOMEM;
break;
}
-
- dev_replace->cursor_left = dev_replace->cursor_right;
- dev_replace->item_needs_writeback = 1;
skip:
key.offset = found_key.offset + length;
btrfs_release_path(path);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4e59a91a11e0..60e7179ed4b7 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -235,7 +235,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
trans->aborted = errno;
/* Nothing used. The other threads that have joined this
* transaction may be able to continue. */
- if (!trans->blocks_used && list_empty(&trans->new_bgs)) {
+ if (!trans->dirty && list_empty(&trans->new_bgs)) {
const char *errstr;
errstr = btrfs_decode_error(errno);
@@ -1807,6 +1807,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
}
}
sb->s_flags &= ~MS_RDONLY;
+
+ fs_info->open = 1;
}
out:
wake_up_process(fs_info->transaction_kthread);
@@ -2303,7 +2305,7 @@ static void btrfs_interface_exit(void)
static void btrfs_print_mod_info(void)
{
- printk(KERN_INFO "Btrfs loaded"
+ printk(KERN_INFO "Btrfs loaded, crc32c=%s"
#ifdef CONFIG_BTRFS_DEBUG
", debug=on"
#endif
@@ -2313,33 +2315,48 @@ static void btrfs_print_mod_info(void)
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
", integrity-checker=on"
#endif
- "\n");
+ "\n",
+ btrfs_crc32c_impl());
}
static int btrfs_run_sanity_tests(void)
{
- int ret;
-
+ int ret, i;
+ u32 sectorsize, nodesize;
+ u32 test_sectorsize[] = {
+ PAGE_SIZE,
+ };
ret = btrfs_init_test_fs();
if (ret)
return ret;
-
- ret = btrfs_test_free_space_cache();
- if (ret)
- goto out;
- ret = btrfs_test_extent_buffer_operations();
- if (ret)
- goto out;
- ret = btrfs_test_extent_io();
- if (ret)
- goto out;
- ret = btrfs_test_inodes();
- if (ret)
- goto out;
- ret = btrfs_test_qgroups();
- if (ret)
- goto out;
- ret = btrfs_test_free_space_tree();
+ for (i = 0; i < ARRAY_SIZE(test_sectorsize); i++) {
+ sectorsize = test_sectorsize[i];
+ for (nodesize = sectorsize;
+ nodesize <= BTRFS_MAX_METADATA_BLOCKSIZE;
+ nodesize <<= 1) {
+ pr_info("BTRFS: selftest: sectorsize: %u nodesize: %u\n",
+ sectorsize, nodesize);
+ ret = btrfs_test_free_space_cache(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_extent_buffer_operations(sectorsize,
+ nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_extent_io(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_inodes(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_qgroups(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_free_space_tree(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ }
+ }
out:
btrfs_destroy_test_fs();
return ret;
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index f54bf450bad3..02223f3f78f4 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -68,7 +68,7 @@ int btrfs_init_test_fs(void)
if (IS_ERR(test_mnt)) {
printk(KERN_ERR "btrfs: cannot mount test file system\n");
unregister_filesystem(&test_type);
- return ret;
+ return PTR_ERR(test_mnt);
}
return 0;
}
@@ -175,7 +175,7 @@ void btrfs_free_dummy_root(struct btrfs_root *root)
}
struct btrfs_block_group_cache *
-btrfs_alloc_dummy_block_group(unsigned long length)
+btrfs_alloc_dummy_block_group(unsigned long length, u32 sectorsize)
{
struct btrfs_block_group_cache *cache;
@@ -192,8 +192,8 @@ btrfs_alloc_dummy_block_group(unsigned long length)
cache->key.objectid = 0;
cache->key.offset = length;
cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
- cache->sectorsize = 4096;
- cache->full_stripe_len = 4096;
+ cache->sectorsize = sectorsize;
+ cache->full_stripe_len = sectorsize;
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index 054b8c73c951..66fb6b701eb7 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -26,27 +26,28 @@
struct btrfs_root;
struct btrfs_trans_handle;
-int btrfs_test_free_space_cache(void);
-int btrfs_test_extent_buffer_operations(void);
-int btrfs_test_extent_io(void);
-int btrfs_test_inodes(void);
-int btrfs_test_qgroups(void);
-int btrfs_test_free_space_tree(void);
+int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize);
+int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize);
+int btrfs_test_extent_io(u32 sectorsize, u32 nodesize);
+int btrfs_test_inodes(u32 sectorsize, u32 nodesize);
+int btrfs_test_qgroups(u32 sectorsize, u32 nodesize);
+int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize);
int btrfs_init_test_fs(void);
void btrfs_destroy_test_fs(void);
struct inode *btrfs_new_test_inode(void);
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void);
void btrfs_free_dummy_root(struct btrfs_root *root);
struct btrfs_block_group_cache *
-btrfs_alloc_dummy_block_group(unsigned long length);
+btrfs_alloc_dummy_block_group(unsigned long length, u32 sectorsize);
void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache);
void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans);
#else
-static inline int btrfs_test_free_space_cache(void)
+static inline int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
{
return 0;
}
-static inline int btrfs_test_extent_buffer_operations(void)
+static inline int btrfs_test_extent_buffer_operations(u32 sectorsize,
+ u32 nodesize)
{
return 0;
}
@@ -57,19 +58,19 @@ static inline int btrfs_init_test_fs(void)
static inline void btrfs_destroy_test_fs(void)
{
}
-static inline int btrfs_test_extent_io(void)
+static inline int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
{
return 0;
}
-static inline int btrfs_test_inodes(void)
+static inline int btrfs_test_inodes(u32 sectorsize, u32 nodesize)
{
return 0;
}
-static inline int btrfs_test_qgroups(void)
+static inline int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
{
return 0;
}
-static inline int btrfs_test_free_space_tree(void)
+static inline int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize)
{
return 0;
}
diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c
index f51963a8f929..4f8cbd1ec5ee 100644
--- a/fs/btrfs/tests/extent-buffer-tests.c
+++ b/fs/btrfs/tests/extent-buffer-tests.c
@@ -22,7 +22,7 @@
#include "../extent_io.h"
#include "../disk-io.h"
-static int test_btrfs_split_item(void)
+static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
{
struct btrfs_path *path;
struct btrfs_root *root;
@@ -40,7 +40,7 @@ static int test_btrfs_split_item(void)
test_msg("Running btrfs_split_item tests\n");
- root = btrfs_alloc_dummy_root();
+ root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(root)) {
test_msg("Could not allocate root\n");
return PTR_ERR(root);
@@ -53,7 +53,8 @@ static int test_btrfs_split_item(void)
return -ENOMEM;
}
- path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, 4096);
+ path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, nodesize,
+ nodesize);
if (!eb) {
test_msg("Could not allocate dummy buffer\n");
ret = -ENOMEM;
@@ -222,8 +223,8 @@ out:
return ret;
}
-int btrfs_test_extent_buffer_operations(void)
+int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize)
{
- test_msg("Running extent buffer operation tests");
- return test_btrfs_split_item();
+ test_msg("Running extent buffer operation tests\n");
+ return test_btrfs_split_item(sectorsize, nodesize);
}
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 55724607f79b..d19ab0317283 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -21,6 +21,7 @@
#include <linux/slab.h>
#include <linux/sizes.h>
#include "btrfs-tests.h"
+#include "../ctree.h"
#include "../extent_io.h"
#define PROCESS_UNLOCK (1 << 0)
@@ -65,7 +66,7 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
return count;
}
-static int test_find_delalloc(void)
+static int test_find_delalloc(u32 sectorsize)
{
struct inode *inode;
struct extent_io_tree tmp;
@@ -113,7 +114,7 @@ static int test_find_delalloc(void)
* |--- delalloc ---|
* |--- search ---|
*/
- set_extent_delalloc(&tmp, 0, 4095, NULL);
+ set_extent_delalloc(&tmp, 0, sectorsize - 1, NULL);
start = 0;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -122,9 +123,9 @@ static int test_find_delalloc(void)
test_msg("Should have found at least one delalloc\n");
goto out_bits;
}
- if (start != 0 || end != 4095) {
- test_msg("Expected start 0 end 4095, got start %Lu end %Lu\n",
- start, end);
+ if (start != 0 || end != (sectorsize - 1)) {
+ test_msg("Expected start 0 end %u, got start %llu end %llu\n",
+ sectorsize - 1, start, end);
goto out_bits;
}
unlock_extent(&tmp, start, end);
@@ -144,7 +145,7 @@ static int test_find_delalloc(void)
test_msg("Couldn't find the locked page\n");
goto out_bits;
}
- set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL);
+ set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, NULL);
start = test_start;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -172,7 +173,7 @@ static int test_find_delalloc(void)
* |--- delalloc ---|
* |--- search ---|
*/
- test_start = max_bytes + 4096;
+ test_start = max_bytes + sectorsize;
locked_page = find_lock_page(inode->i_mapping, test_start >>
PAGE_SHIFT);
if (!locked_page) {
@@ -272,6 +273,16 @@ out:
return ret;
}
+/**
+ * test_bit_in_byte - Determine whether a bit is set in a byte
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static inline int test_bit_in_byte(int nr, const u8 *addr)
+{
+ return 1UL & (addr[nr / BITS_PER_BYTE] >> (nr & (BITS_PER_BYTE - 1)));
+}
+
static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
unsigned long len)
{
@@ -298,25 +309,29 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
return -EINVAL;
}
- bitmap_set(bitmap, (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
- sizeof(long) * BITS_PER_BYTE);
- extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
- sizeof(long) * BITS_PER_BYTE);
- if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
- test_msg("Setting straddling pages failed\n");
- return -EINVAL;
- }
+ /* Straddling pages test */
+ if (len > PAGE_SIZE) {
+ bitmap_set(bitmap,
+ (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
+ sizeof(long) * BITS_PER_BYTE);
+ extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
+ sizeof(long) * BITS_PER_BYTE);
+ if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+ test_msg("Setting straddling pages failed\n");
+ return -EINVAL;
+ }
- bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
- bitmap_clear(bitmap,
- (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
- sizeof(long) * BITS_PER_BYTE);
- extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
- extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
- sizeof(long) * BITS_PER_BYTE);
- if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
- test_msg("Clearing straddling pages failed\n");
- return -EINVAL;
+ bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
+ bitmap_clear(bitmap,
+ (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
+ sizeof(long) * BITS_PER_BYTE);
+ extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
+ extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
+ sizeof(long) * BITS_PER_BYTE);
+ if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+ test_msg("Clearing straddling pages failed\n");
+ return -EINVAL;
+ }
}
/*
@@ -333,7 +348,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
for (i = 0; i < len * BITS_PER_BYTE; i++) {
int bit, bit1;
- bit = !!test_bit(i, bitmap);
+ bit = !!test_bit_in_byte(i, (u8 *)bitmap);
bit1 = !!extent_buffer_test_bit(eb, 0, i);
if (bit1 != bit) {
test_msg("Testing bit pattern failed\n");
@@ -351,15 +366,22 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
return 0;
}
-static int test_eb_bitmaps(void)
+static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
{
- unsigned long len = PAGE_SIZE * 4;
+ unsigned long len;
unsigned long *bitmap;
struct extent_buffer *eb;
int ret;
test_msg("Running extent buffer bitmap tests\n");
+ /*
+ * In ppc64, sectorsize can be 64K, thus 4 * 64K will be larger than
+ * BTRFS_MAX_METADATA_BLOCKSIZE.
+ */
+ len = (sectorsize < BTRFS_MAX_METADATA_BLOCKSIZE)
+ ? sectorsize * 4 : sectorsize;
+
bitmap = kmalloc(len, GFP_KERNEL);
if (!bitmap) {
test_msg("Couldn't allocate test bitmap\n");
@@ -379,7 +401,7 @@ static int test_eb_bitmaps(void)
/* Do it over again with an extent buffer which isn't page-aligned. */
free_extent_buffer(eb);
- eb = __alloc_dummy_extent_buffer(NULL, PAGE_SIZE / 2, len);
+ eb = __alloc_dummy_extent_buffer(NULL, nodesize / 2, len);
if (!eb) {
test_msg("Couldn't allocate test extent buffer\n");
kfree(bitmap);
@@ -393,17 +415,17 @@ out:
return ret;
}
-int btrfs_test_extent_io(void)
+int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
{
int ret;
test_msg("Running extent I/O tests\n");
- ret = test_find_delalloc();
+ ret = test_find_delalloc(sectorsize);
if (ret)
goto out;
- ret = test_eb_bitmaps();
+ ret = test_eb_bitmaps(sectorsize, nodesize);
out:
test_msg("Extent I/O tests finished\n");
return ret;
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index 0eeb8f3d6b67..3956bb2ff84c 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -22,7 +22,7 @@
#include "../disk-io.h"
#include "../free-space-cache.h"
-#define BITS_PER_BITMAP (PAGE_SIZE * 8)
+#define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
/*
* This test just does basic sanity checking, making sure we can add an extent
@@ -99,7 +99,8 @@ static int test_extents(struct btrfs_block_group_cache *cache)
return 0;
}
-static int test_bitmaps(struct btrfs_block_group_cache *cache)
+static int test_bitmaps(struct btrfs_block_group_cache *cache,
+ u32 sectorsize)
{
u64 next_bitmap_offset;
int ret;
@@ -139,7 +140,7 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache)
* The first bitmap we have starts at offset 0 so the next one is just
* at the end of the first bitmap.
*/
- next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
+ next_bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize);
/* Test a bit straddling two bitmaps */
ret = test_add_free_space_entry(cache, next_bitmap_offset - SZ_2M,
@@ -167,9 +168,10 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache)
}
/* This is the high grade jackassery */
-static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
+static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache,
+ u32 sectorsize)
{
- u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
+ u64 bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize);
int ret;
test_msg("Running bitmap and extent tests\n");
@@ -401,7 +403,8 @@ static int check_cache_empty(struct btrfs_block_group_cache *cache)
* requests.
*/
static int
-test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
+test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
+ u32 sectorsize)
{
int ret;
u64 offset;
@@ -539,7 +542,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* The goal is to test that the bitmap entry space stealing doesn't
* steal this space region.
*/
- ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, 4096);
+ ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, sectorsize);
if (ret) {
test_msg("Error adding free space: %d\n", ret);
return ret;
@@ -597,8 +600,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
return -ENOENT;
}
- if (cache->free_space_ctl->free_space != (SZ_1M + 4096)) {
- test_msg("Cache free space is not 1Mb + 4Kb\n");
+ if (cache->free_space_ctl->free_space != (SZ_1M + sectorsize)) {
+ test_msg("Cache free space is not 1Mb + %u\n", sectorsize);
return -EINVAL;
}
@@ -611,22 +614,25 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
return -EINVAL;
}
- /* All that remains is a 4Kb free space region in a bitmap. Confirm. */
+ /*
+ * All that remains is a sectorsize free space region in a bitmap.
+ * Confirm.
+ */
ret = check_num_extents_and_bitmaps(cache, 1, 1);
if (ret)
return ret;
- if (cache->free_space_ctl->free_space != 4096) {
- test_msg("Cache free space is not 4Kb\n");
+ if (cache->free_space_ctl->free_space != sectorsize) {
+ test_msg("Cache free space is not %u\n", sectorsize);
return -EINVAL;
}
offset = btrfs_find_space_for_alloc(cache,
- 0, 4096, 0,
+ 0, sectorsize, 0,
&max_extent_size);
if (offset != (SZ_128M + SZ_16M)) {
- test_msg("Failed to allocate 4Kb from space cache, returned offset is: %llu\n",
- offset);
+ test_msg("Failed to allocate %u, returned offset : %llu\n",
+ sectorsize, offset);
return -EINVAL;
}
@@ -733,7 +739,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* The goal is to test that the bitmap entry space stealing doesn't
* steal this space region.
*/
- ret = btrfs_add_free_space(cache, SZ_32M, 8192);
+ ret = btrfs_add_free_space(cache, SZ_32M, 2 * sectorsize);
if (ret) {
test_msg("Error adding free space: %d\n", ret);
return ret;
@@ -757,7 +763,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
/*
* Confirm that our extent entry didn't stole all free space from the
- * bitmap, because of the small 8Kb free space region.
+ * bitmap, because of the small 2 * sectorsize free space region.
*/
ret = check_num_extents_and_bitmaps(cache, 2, 1);
if (ret)
@@ -783,8 +789,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
return -ENOENT;
}
- if (cache->free_space_ctl->free_space != (SZ_1M + 8192)) {
- test_msg("Cache free space is not 1Mb + 8Kb\n");
+ if (cache->free_space_ctl->free_space != (SZ_1M + 2 * sectorsize)) {
+ test_msg("Cache free space is not 1Mb + %u\n", 2 * sectorsize);
return -EINVAL;
}
@@ -796,21 +802,25 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
return -EINVAL;
}
- /* All that remains is a 8Kb free space region in a bitmap. Confirm. */
+ /*
+ * All that remains is 2 * sectorsize free space region
+ * in a bitmap. Confirm.
+ */
ret = check_num_extents_and_bitmaps(cache, 1, 1);
if (ret)
return ret;
- if (cache->free_space_ctl->free_space != 8192) {
- test_msg("Cache free space is not 8Kb\n");
+ if (cache->free_space_ctl->free_space != 2 * sectorsize) {
+ test_msg("Cache free space is not %u\n", 2 * sectorsize);
return -EINVAL;
}
offset = btrfs_find_space_for_alloc(cache,
- 0, 8192, 0,
+ 0, 2 * sectorsize, 0,
&max_extent_size);
if (offset != SZ_32M) {
- test_msg("Failed to allocate 8Kb from space cache, returned offset is: %llu\n",
+ test_msg("Failed to allocate %u, offset: %llu\n",
+ 2 * sectorsize,
offset);
return -EINVAL;
}
@@ -825,7 +835,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
return 0;
}
-int btrfs_test_free_space_cache(void)
+int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
{
struct btrfs_block_group_cache *cache;
struct btrfs_root *root = NULL;
@@ -833,13 +843,19 @@ int btrfs_test_free_space_cache(void)
test_msg("Running btrfs free space cache tests\n");
- cache = btrfs_alloc_dummy_block_group(1024 * 1024 * 1024);
+ /*
+ * For ppc64 (with 64k page size), bytes per bitmap might be
+ * larger than 1G. To make bitmap test available in ppc64,
+ * alloc dummy block group whose size cross bitmaps.
+ */
+ cache = btrfs_alloc_dummy_block_group(BITS_PER_BITMAP * sectorsize
+ + PAGE_SIZE, sectorsize);
if (!cache) {
test_msg("Couldn't run the tests\n");
return 0;
}
- root = btrfs_alloc_dummy_root();
+ root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
@@ -855,14 +871,14 @@ int btrfs_test_free_space_cache(void)
ret = test_extents(cache);
if (ret)
goto out;
- ret = test_bitmaps(cache);
+ ret = test_bitmaps(cache, sectorsize);
if (ret)
goto out;
- ret = test_bitmaps_and_extents(cache);
+ ret = test_bitmaps_and_extents(cache, sectorsize);
if (ret)
goto out;
- ret = test_steal_space_from_bitmap_to_extent(cache);
+ ret = test_steal_space_from_bitmap_to_extent(cache, sectorsize);
out:
btrfs_free_dummy_block_group(cache);
btrfs_free_dummy_root(root);
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index 7cea4462acd5..aac507085ab0 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -16,6 +16,7 @@
* Boston, MA 021110-1307, USA.
*/
+#include <linux/types.h>
#include "btrfs-tests.h"
#include "../ctree.h"
#include "../disk-io.h"
@@ -30,7 +31,7 @@ struct free_space_extent {
* The test cases align their operations to this in order to hit some of the
* edge cases in the bitmap code.
*/
-#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * 4096)
+#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * PAGE_SIZE)
static int __check_free_space_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
@@ -439,7 +440,8 @@ typedef int (*test_func_t)(struct btrfs_trans_handle *,
struct btrfs_block_group_cache *,
struct btrfs_path *);
-static int run_test(test_func_t test_func, int bitmaps)
+static int run_test(test_func_t test_func, int bitmaps,
+ u32 sectorsize, u32 nodesize)
{
struct btrfs_root *root = NULL;
struct btrfs_block_group_cache *cache = NULL;
@@ -447,7 +449,7 @@ static int run_test(test_func_t test_func, int bitmaps)
struct btrfs_path *path = NULL;
int ret;
- root = btrfs_alloc_dummy_root();
+ root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(root)) {
test_msg("Couldn't allocate dummy root\n");
ret = PTR_ERR(root);
@@ -466,7 +468,8 @@ static int run_test(test_func_t test_func, int bitmaps)
root->fs_info->free_space_root = root;
root->fs_info->tree_root = root;
- root->node = alloc_test_extent_buffer(root->fs_info, 4096);
+ root->node = alloc_test_extent_buffer(root->fs_info,
+ nodesize, nodesize);
if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n");
ret = -ENOMEM;
@@ -474,9 +477,9 @@ static int run_test(test_func_t test_func, int bitmaps)
}
btrfs_set_header_level(root->node, 0);
btrfs_set_header_nritems(root->node, 0);
- root->alloc_bytenr += 8192;
+ root->alloc_bytenr += 2 * nodesize;
- cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE);
+ cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE, sectorsize);
if (!cache) {
test_msg("Couldn't allocate dummy block group cache\n");
ret = -ENOMEM;
@@ -534,17 +537,18 @@ out:
return ret;
}
-static int run_test_both_formats(test_func_t test_func)
+static int run_test_both_formats(test_func_t test_func,
+ u32 sectorsize, u32 nodesize)
{
int ret;
- ret = run_test(test_func, 0);
+ ret = run_test(test_func, 0, sectorsize, nodesize);
if (ret)
return ret;
- return run_test(test_func, 1);
+ return run_test(test_func, 1, sectorsize, nodesize);
}
-int btrfs_test_free_space_tree(void)
+int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize)
{
test_func_t tests[] = {
test_empty_block_group,
@@ -561,9 +565,11 @@ int btrfs_test_free_space_tree(void)
test_msg("Running free space tree tests\n");
for (i = 0; i < ARRAY_SIZE(tests); i++) {
- int ret = run_test_both_formats(tests[i]);
+ int ret = run_test_both_formats(tests[i], sectorsize,
+ nodesize);
if (ret) {
- test_msg("%pf failed\n", tests[i]);
+ test_msg("%pf : sectorsize %u failed\n",
+ tests[i], sectorsize);
return ret;
}
}
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 8a25fe8b7c45..29648c0a39f1 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -16,6 +16,7 @@
* Boston, MA 021110-1307, USA.
*/
+#include <linux/types.h>
#include "btrfs-tests.h"
#include "../ctree.h"
#include "../btrfs_inode.h"
@@ -86,19 +87,19 @@ static void insert_inode_item_key(struct btrfs_root *root)
* diagram of how the extents will look though this may not be possible we still
* want to make sure everything acts normally (the last number is not inclusive)
*
- * [0 - 5][5 - 6][6 - 10][10 - 4096][ 4096 - 8192 ][8192 - 12288]
- * [hole ][inline][ hole ][ regular ][regular1 split][ hole ]
+ * [0 - 5][5 - 6][ 6 - 4096 ][ 4096 - 4100][4100 - 8195][8195 - 12291]
+ * [hole ][inline][hole but no extent][ hole ][ regular ][regular1 split]
*
- * [ 12288 - 20480][20480 - 24576][ 24576 - 28672 ][28672 - 36864][36864 - 45056]
- * [regular1 split][ prealloc1 ][prealloc1 written][ prealloc1 ][ compressed ]
+ * [12291 - 16387][16387 - 24579][24579 - 28675][ 28675 - 32771][32771 - 36867 ]
+ * [ hole ][regular1 split][ prealloc ][ prealloc1 ][prealloc1 written]
*
- * [45056 - 49152][49152-53248][53248-61440][61440-65536][ 65536+81920 ]
- * [ compressed1 ][ regular ][compressed1][ regular ][ hole but no extent]
+ * [36867 - 45059][45059 - 53251][53251 - 57347][57347 - 61443][61443- 69635]
+ * [ prealloc1 ][ compressed ][ compressed1 ][ regular ][ compressed1]
*
- * [81920-86016]
- * [ regular ]
+ * [69635-73731][ 73731 - 86019 ][86019-90115]
+ * [ regular ][ hole but no extent][ regular ]
*/
-static void setup_file_extents(struct btrfs_root *root)
+static void setup_file_extents(struct btrfs_root *root, u32 sectorsize)
{
int slot = 0;
u64 disk_bytenr = SZ_1M;
@@ -119,7 +120,7 @@ static void setup_file_extents(struct btrfs_root *root)
insert_extent(root, offset, 1, 1, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0,
slot);
slot++;
- offset = 4096;
+ offset = sectorsize;
/* Now another hole */
insert_extent(root, offset, 4, 4, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0,
@@ -128,99 +129,106 @@ static void setup_file_extents(struct btrfs_root *root)
offset += 4;
/* Now for a regular extent */
- insert_extent(root, offset, 4095, 4095, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_REG, 0, slot);
+ insert_extent(root, offset, sectorsize - 1, sectorsize - 1, 0,
+ disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
- disk_bytenr += 4096;
- offset += 4095;
+ disk_bytenr += sectorsize;
+ offset += sectorsize - 1;
/*
* Now for 3 extents that were split from a hole punch so we test
* offsets properly.
*/
- insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384,
- BTRFS_FILE_EXTENT_REG, 0, slot);
+ insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr,
+ 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
- offset += 4096;
- insert_extent(root, offset, 4096, 4096, 0, 0, 0, BTRFS_FILE_EXTENT_REG,
- 0, slot);
+ offset += sectorsize;
+ insert_extent(root, offset, sectorsize, sectorsize, 0, 0, 0,
+ BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
- offset += 4096;
- insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384,
+ offset += sectorsize;
+ insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize,
+ 2 * sectorsize, disk_bytenr, 4 * sectorsize,
BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
- offset += 8192;
- disk_bytenr += 16384;
+ offset += 2 * sectorsize;
+ disk_bytenr += 4 * sectorsize;
/* Now for a unwritten prealloc extent */
- insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
+ insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr,
+ sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
slot++;
- offset += 4096;
+ offset += sectorsize;
/*
* We want to jack up disk_bytenr a little more so the em stuff doesn't
* merge our records.
*/
- disk_bytenr += 8192;
+ disk_bytenr += 2 * sectorsize;
/*
* Now for a partially written prealloc extent, basically the same as
* the hole punch example above. Ram_bytes never changes when you mark
* extents written btw.
*/
- insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384,
- BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
+ insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr,
+ 4 * sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
slot++;
- offset += 4096;
- insert_extent(root, offset, 4096, 16384, 4096, disk_bytenr, 16384,
- BTRFS_FILE_EXTENT_REG, 0, slot);
+ offset += sectorsize;
+ insert_extent(root, offset, sectorsize, 4 * sectorsize, sectorsize,
+ disk_bytenr, 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0,
+ slot);
slot++;
- offset += 4096;
- insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384,
+ offset += sectorsize;
+ insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize,
+ 2 * sectorsize, disk_bytenr, 4 * sectorsize,
BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
slot++;
- offset += 8192;
- disk_bytenr += 16384;
+ offset += 2 * sectorsize;
+ disk_bytenr += 4 * sectorsize;
/* Now a normal compressed extent */
- insert_extent(root, offset, 8192, 8192, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
+ insert_extent(root, offset, 2 * sectorsize, 2 * sectorsize, 0,
+ disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG,
+ BTRFS_COMPRESS_ZLIB, slot);
slot++;
- offset += 8192;
+ offset += 2 * sectorsize;
/* No merges */
- disk_bytenr += 8192;
+ disk_bytenr += 2 * sectorsize;
/* Now a split compressed extent */
- insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
+ insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr,
+ sectorsize, BTRFS_FILE_EXTENT_REG,
+ BTRFS_COMPRESS_ZLIB, slot);
slot++;
- offset += 4096;
- insert_extent(root, offset, 4096, 4096, 0, disk_bytenr + 4096, 4096,
+ offset += sectorsize;
+ insert_extent(root, offset, sectorsize, sectorsize, 0,
+ disk_bytenr + sectorsize, sectorsize,
BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
- offset += 4096;
- insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 4096,
+ offset += sectorsize;
+ insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize,
+ 2 * sectorsize, disk_bytenr, sectorsize,
BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
slot++;
- offset += 8192;
- disk_bytenr += 8192;
+ offset += 2 * sectorsize;
+ disk_bytenr += 2 * sectorsize;
/* Now extents that have a hole but no hole extent */
- insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_REG, 0, slot);
+ insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr,
+ sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
- offset += 16384;
- disk_bytenr += 4096;
- insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_REG, 0, slot);
+ offset += 4 * sectorsize;
+ disk_bytenr += sectorsize;
+ insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr,
+ sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot);
}
static unsigned long prealloc_only = 0;
static unsigned long compressed_only = 0;
static unsigned long vacancy_only = 0;
-static noinline int test_btrfs_get_extent(void)
+static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
{
struct inode *inode = NULL;
struct btrfs_root *root = NULL;
@@ -240,7 +248,7 @@ static noinline int test_btrfs_get_extent(void)
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
- root = btrfs_alloc_dummy_root();
+ root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(root)) {
test_msg("Couldn't allocate root\n");
goto out;
@@ -256,7 +264,7 @@ static noinline int test_btrfs_get_extent(void)
goto out;
}
- root->node = alloc_dummy_extent_buffer(NULL, 4096);
+ root->node = alloc_dummy_extent_buffer(NULL, nodesize, nodesize);
if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n");
goto out;
@@ -273,7 +281,7 @@ static noinline int test_btrfs_get_extent(void)
/* First with no extents */
BTRFS_I(inode)->root = root;
- em = btrfs_get_extent(inode, NULL, 0, 0, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, 0, sectorsize, 0);
if (IS_ERR(em)) {
em = NULL;
test_msg("Got an error when we shouldn't have\n");
@@ -295,7 +303,7 @@ static noinline int test_btrfs_get_extent(void)
* setup_file_extents, so if you change anything there you need to
* update the comment and update the expected values below.
*/
- setup_file_extents(root);
+ setup_file_extents(root, sectorsize);
em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0);
if (IS_ERR(em)) {
@@ -318,7 +326,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -327,7 +335,8 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected an inline, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4091) {
+
+ if (em->start != offset || em->len != (sectorsize - 5)) {
test_msg("Unexpected extent wanted start %llu len 1, got start "
"%llu len %llu\n", offset, em->start, em->len);
goto out;
@@ -344,7 +353,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -366,7 +375,7 @@ static noinline int test_btrfs_get_extent(void)
free_extent_map(em);
/* Regular extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -375,7 +384,7 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4095) {
+ if (em->start != offset || em->len != sectorsize - 1) {
test_msg("Unexpected extent wanted start %llu len 4095, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
@@ -393,7 +402,7 @@ static noinline int test_btrfs_get_extent(void)
free_extent_map(em);
/* The next 3 are split extents */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -402,9 +411,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
@@ -421,7 +431,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -430,9 +440,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a hole, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
@@ -442,7 +453,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -451,9 +462,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 8192) {
- test_msg("Unexpected extent wanted start %llu len 8192, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != 2 * sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, 2 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
@@ -475,7 +487,7 @@ static noinline int test_btrfs_get_extent(void)
free_extent_map(em);
/* Prealloc extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -484,9 +496,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != prealloc_only) {
@@ -503,7 +516,7 @@ static noinline int test_btrfs_get_extent(void)
free_extent_map(em);
/* The next 3 are a half written prealloc extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -512,9 +525,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != prealloc_only) {
@@ -532,7 +546,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -541,9 +555,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
@@ -564,7 +579,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -573,9 +588,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 8192) {
- test_msg("Unexpected extent wanted start %llu len 8192, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != 2 * sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, 2 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != prealloc_only) {
@@ -598,7 +614,7 @@ static noinline int test_btrfs_get_extent(void)
free_extent_map(em);
/* Now for the compressed extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -607,9 +623,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 8192) {
- test_msg("Unexpected extent wanted start %llu len 8192, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != 2 * sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u,"
+ "got start %llu len %llu\n",
+ offset, 2 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != compressed_only) {
@@ -631,7 +648,7 @@ static noinline int test_btrfs_get_extent(void)
free_extent_map(em);
/* Split compressed extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -640,9 +657,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u,"
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != compressed_only) {
@@ -665,7 +683,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -674,9 +692,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
@@ -691,7 +710,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -701,9 +720,10 @@ static noinline int test_btrfs_get_extent(void)
disk_bytenr, em->block_start);
goto out;
}
- if (em->start != offset || em->len != 8192) {
- test_msg("Unexpected extent wanted start %llu len 8192, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != 2 * sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, 2 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != compressed_only) {
@@ -725,7 +745,7 @@ static noinline int test_btrfs_get_extent(void)
free_extent_map(em);
/* A hole between regular extents but no hole extent */
- em = btrfs_get_extent(inode, NULL, 0, offset + 6, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset + 6, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -734,9 +754,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
@@ -765,9 +786,10 @@ static noinline int test_btrfs_get_extent(void)
* length of the actual hole, if this changes we'll have to change this
* test.
*/
- if (em->start != offset || em->len != 12288) {
- test_msg("Unexpected extent wanted start %llu len 12288, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != 3 * sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, 3 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != vacancy_only) {
@@ -783,7 +805,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -792,9 +814,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u,"
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
@@ -815,7 +838,7 @@ out:
return ret;
}
-static int test_hole_first(void)
+static int test_hole_first(u32 sectorsize, u32 nodesize)
{
struct inode *inode = NULL;
struct btrfs_root *root = NULL;
@@ -832,7 +855,7 @@ static int test_hole_first(void)
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
- root = btrfs_alloc_dummy_root();
+ root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(root)) {
test_msg("Couldn't allocate root\n");
goto out;
@@ -844,7 +867,7 @@ static int test_hole_first(void)
goto out;
}
- root->node = alloc_dummy_extent_buffer(NULL, 4096);
+ root->node = alloc_dummy_extent_buffer(NULL, nodesize, nodesize);
if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n");
goto out;
@@ -861,9 +884,9 @@ static int test_hole_first(void)
* btrfs_get_extent.
*/
insert_inode_item_key(root);
- insert_extent(root, 4096, 4096, 4096, 0, 4096, 4096,
- BTRFS_FILE_EXTENT_REG, 0, 1);
- em = btrfs_get_extent(inode, NULL, 0, 0, 8192, 0);
+ insert_extent(root, sectorsize, sectorsize, sectorsize, 0, sectorsize,
+ sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1);
+ em = btrfs_get_extent(inode, NULL, 0, 0, 2 * sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -872,9 +895,10 @@ static int test_hole_first(void)
test_msg("Expected a hole, got %llu\n", em->block_start);
goto out;
}
- if (em->start != 0 || em->len != 4096) {
- test_msg("Unexpected extent wanted start 0 len 4096, got start "
- "%llu len %llu\n", em->start, em->len);
+ if (em->start != 0 || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start 0 len %u, "
+ "got start %llu len %llu\n",
+ sectorsize, em->start, em->len);
goto out;
}
if (em->flags != vacancy_only) {
@@ -884,18 +908,19 @@ static int test_hole_first(void)
}
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, 4096, 8192, 0);
+ em = btrfs_get_extent(inode, NULL, 0, sectorsize, 2 * sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
- if (em->block_start != 4096) {
+ if (em->block_start != sectorsize) {
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != 4096 || em->len != 4096) {
- test_msg("Unexpected extent wanted start 4096 len 4096, got "
- "start %llu len %llu\n", em->start, em->len);
+ if (em->start != sectorsize || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %u len %u, "
+ "got start %llu len %llu\n",
+ sectorsize, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
@@ -912,7 +937,7 @@ out:
return ret;
}
-static int test_extent_accounting(void)
+static int test_extent_accounting(u32 sectorsize, u32 nodesize)
{
struct inode *inode = NULL;
struct btrfs_root *root = NULL;
@@ -924,7 +949,7 @@ static int test_extent_accounting(void)
return ret;
}
- root = btrfs_alloc_dummy_root();
+ root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(root)) {
test_msg("Couldn't allocate root\n");
goto out;
@@ -954,10 +979,11 @@ static int test_extent_accounting(void)
goto out;
}
- /* [BTRFS_MAX_EXTENT_SIZE][4k] */
+ /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
BTRFS_I(inode)->outstanding_extents++;
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
- BTRFS_MAX_EXTENT_SIZE + 4095, NULL);
+ BTRFS_MAX_EXTENT_SIZE + sectorsize - 1,
+ NULL);
if (ret) {
test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
goto out;
@@ -969,10 +995,10 @@ static int test_extent_accounting(void)
goto out;
}
- /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */
+ /* [BTRFS_MAX_EXTENT_SIZE/2][sectorsize HOLE][the rest] */
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
BTRFS_MAX_EXTENT_SIZE >> 1,
- (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
+ (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
EXTENT_DELALLOC | EXTENT_DIRTY |
EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
NULL, GFP_KERNEL);
@@ -987,10 +1013,11 @@ static int test_extent_accounting(void)
goto out;
}
- /* [BTRFS_MAX_EXTENT_SIZE][4K] */
+ /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
BTRFS_I(inode)->outstanding_extents++;
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
- (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
+ (BTRFS_MAX_EXTENT_SIZE >> 1)
+ + sectorsize - 1,
NULL);
if (ret) {
test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
@@ -1004,16 +1031,17 @@ static int test_extent_accounting(void)
}
/*
- * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K]
+ * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize]
*
* I'm artificially adding 2 to outstanding_extents because in the
* buffered IO case we'd add things up as we go, but I don't feel like
* doing that here, this isn't the interesting case we want to test.
*/
BTRFS_I(inode)->outstanding_extents += 2;
- ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192,
- (BTRFS_MAX_EXTENT_SIZE << 1) + 12287,
- NULL);
+ ret = btrfs_set_extent_delalloc(inode,
+ BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize,
+ (BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1,
+ NULL);
if (ret) {
test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
goto out;
@@ -1025,10 +1053,13 @@ static int test_extent_accounting(void)
goto out;
}
- /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */
+ /*
+ * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize]
+ */
BTRFS_I(inode)->outstanding_extents++;
- ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
- BTRFS_MAX_EXTENT_SIZE+8191, NULL);
+ ret = btrfs_set_extent_delalloc(inode,
+ BTRFS_MAX_EXTENT_SIZE + sectorsize,
+ BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL);
if (ret) {
test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
goto out;
@@ -1042,8 +1073,8 @@ static int test_extent_accounting(void)
/* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
- BTRFS_MAX_EXTENT_SIZE+4096,
- BTRFS_MAX_EXTENT_SIZE+8191,
+ BTRFS_MAX_EXTENT_SIZE + sectorsize,
+ BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
NULL, GFP_KERNEL);
@@ -1063,8 +1094,9 @@ static int test_extent_accounting(void)
* might fail and I'd rather satisfy my paranoia at this point.
*/
BTRFS_I(inode)->outstanding_extents++;
- ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
- BTRFS_MAX_EXTENT_SIZE+8191, NULL);
+ ret = btrfs_set_extent_delalloc(inode,
+ BTRFS_MAX_EXTENT_SIZE + sectorsize,
+ BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL);
if (ret) {
test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
goto out;
@@ -1103,7 +1135,7 @@ out:
return ret;
}
-int btrfs_test_inodes(void)
+int btrfs_test_inodes(u32 sectorsize, u32 nodesize)
{
int ret;
@@ -1112,13 +1144,13 @@ int btrfs_test_inodes(void)
set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
test_msg("Running btrfs_get_extent tests\n");
- ret = test_btrfs_get_extent();
+ ret = test_btrfs_get_extent(sectorsize, nodesize);
if (ret)
return ret;
test_msg("Running hole first btrfs_get_extent test\n");
- ret = test_hole_first();
+ ret = test_hole_first(sectorsize, nodesize);
if (ret)
return ret;
test_msg("Running outstanding_extents tests\n");
- return test_extent_accounting();
+ return test_extent_accounting(sectorsize, nodesize);
}
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 8aa4ded31326..57a12c0d680b 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -16,6 +16,7 @@
* Boston, MA 021110-1307, USA.
*/
+#include <linux/types.h>
#include "btrfs-tests.h"
#include "../ctree.h"
#include "../transaction.h"
@@ -216,7 +217,8 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
return ret;
}
-static int test_no_shared_qgroup(struct btrfs_root *root)
+static int test_no_shared_qgroup(struct btrfs_root *root,
+ u32 sectorsize, u32 nodesize)
{
struct btrfs_trans_handle trans;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -227,7 +229,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
btrfs_init_dummy_trans(&trans);
test_msg("Qgroup basic add\n");
- ret = btrfs_create_qgroup(NULL, fs_info, 5);
+ ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FS_TREE_OBJECTID);
if (ret) {
test_msg("Couldn't create a qgroup %d\n", ret);
return ret;
@@ -238,18 +240,19 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
* we can only call btrfs_qgroup_account_extent() directly to test
* quota.
*/
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
if (ret) {
ulist_free(old_roots);
test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
- ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5);
+ ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
+ BTRFS_FS_TREE_OBJECTID);
if (ret)
return ret;
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -257,32 +260,33 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
- old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
+ nodesize, old_roots, new_roots);
if (ret) {
test_msg("Couldn't account space for a qgroup %d\n", ret);
return ret;
}
- if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) {
+ if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
+ nodesize, nodesize)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
old_roots = NULL;
new_roots = NULL;
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
if (ret) {
ulist_free(old_roots);
test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
- ret = remove_extent_item(root, 4096, 4096);
+ ret = remove_extent_item(root, nodesize, nodesize);
if (ret)
return -EINVAL;
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -290,14 +294,14 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
- old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
+ nodesize, old_roots, new_roots);
if (ret) {
test_msg("Couldn't account space for a qgroup %d\n", ret);
return -EINVAL;
}
- if (btrfs_verify_qgroup_counts(fs_info, 5, 0, 0)) {
+ if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, 0, 0)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
@@ -310,7 +314,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
* right, also remove one of the roots and make sure the exclusive count is
* adjusted properly.
*/
-static int test_multiple_refs(struct btrfs_root *root)
+static int test_multiple_refs(struct btrfs_root *root,
+ u32 sectorsize, u32 nodesize)
{
struct btrfs_trans_handle trans;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -322,25 +327,29 @@ static int test_multiple_refs(struct btrfs_root *root)
test_msg("Qgroup multiple refs test\n");
- /* We have 5 created already from the previous test */
- ret = btrfs_create_qgroup(NULL, fs_info, 256);
+ /*
+ * We have BTRFS_FS_TREE_OBJECTID created already from the
+ * previous test.
+ */
+ ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FIRST_FREE_OBJECTID);
if (ret) {
test_msg("Couldn't create a qgroup %d\n", ret);
return ret;
}
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
if (ret) {
ulist_free(old_roots);
test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
- ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5);
+ ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
+ BTRFS_FS_TREE_OBJECTID);
if (ret)
return ret;
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -348,30 +357,32 @@ static int test_multiple_refs(struct btrfs_root *root)
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
- old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
+ nodesize, old_roots, new_roots);
if (ret) {
test_msg("Couldn't account space for a qgroup %d\n", ret);
return ret;
}
- if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) {
+ if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
+ nodesize, nodesize)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
if (ret) {
ulist_free(old_roots);
test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
- ret = add_tree_ref(root, 4096, 4096, 0, 256);
+ ret = add_tree_ref(root, nodesize, nodesize, 0,
+ BTRFS_FIRST_FREE_OBJECTID);
if (ret)
return ret;
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -379,35 +390,38 @@ static int test_multiple_refs(struct btrfs_root *root)
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
- old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
+ nodesize, old_roots, new_roots);
if (ret) {
test_msg("Couldn't account space for a qgroup %d\n", ret);
return ret;
}
- if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 0)) {
+ if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
+ nodesize, 0)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
- if (btrfs_verify_qgroup_counts(fs_info, 256, 4096, 0)) {
+ if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID,
+ nodesize, 0)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
if (ret) {
ulist_free(old_roots);
test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
- ret = remove_extent_ref(root, 4096, 4096, 0, 256);
+ ret = remove_extent_ref(root, nodesize, nodesize, 0,
+ BTRFS_FIRST_FREE_OBJECTID);
if (ret)
return ret;
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -415,19 +429,21 @@ static int test_multiple_refs(struct btrfs_root *root)
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
- old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
+ nodesize, old_roots, new_roots);
if (ret) {
test_msg("Couldn't account space for a qgroup %d\n", ret);
return ret;
}
- if (btrfs_verify_qgroup_counts(fs_info, 256, 0, 0)) {
+ if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID,
+ 0, 0)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
- if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) {
+ if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
+ nodesize, nodesize)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
@@ -435,13 +451,13 @@ static int test_multiple_refs(struct btrfs_root *root)
return 0;
}
-int btrfs_test_qgroups(void)
+int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
{
struct btrfs_root *root;
struct btrfs_root *tmp_root;
int ret = 0;
- root = btrfs_alloc_dummy_root();
+ root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(root)) {
test_msg("Couldn't allocate root\n");
return PTR_ERR(root);
@@ -468,7 +484,8 @@ int btrfs_test_qgroups(void)
* Can't use bytenr 0, some things freak out
* *cough*backref walking code*cough*
*/
- root->node = alloc_test_extent_buffer(root->fs_info, 4096);
+ root->node = alloc_test_extent_buffer(root->fs_info, nodesize,
+ nodesize);
if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n");
ret = -ENOMEM;
@@ -476,16 +493,16 @@ int btrfs_test_qgroups(void)
}
btrfs_set_header_level(root->node, 0);
btrfs_set_header_nritems(root->node, 0);
- root->alloc_bytenr += 8192;
+ root->alloc_bytenr += 2 * nodesize;
- tmp_root = btrfs_alloc_dummy_root();
+ tmp_root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(tmp_root)) {
test_msg("Couldn't allocate a fs root\n");
ret = PTR_ERR(tmp_root);
goto out;
}
- tmp_root->root_key.objectid = 5;
+ tmp_root->root_key.objectid = BTRFS_FS_TREE_OBJECTID;
root->fs_info->fs_root = tmp_root;
ret = btrfs_insert_fs_root(root->fs_info, tmp_root);
if (ret) {
@@ -493,14 +510,14 @@ int btrfs_test_qgroups(void)
goto out;
}
- tmp_root = btrfs_alloc_dummy_root();
+ tmp_root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(tmp_root)) {
test_msg("Couldn't allocate a fs root\n");
ret = PTR_ERR(tmp_root);
goto out;
}
- tmp_root->root_key.objectid = 256;
+ tmp_root->root_key.objectid = BTRFS_FIRST_FREE_OBJECTID;
ret = btrfs_insert_fs_root(root->fs_info, tmp_root);
if (ret) {
test_msg("Couldn't insert fs root %d\n", ret);
@@ -508,10 +525,10 @@ int btrfs_test_qgroups(void)
}
test_msg("Running qgroup tests\n");
- ret = test_no_shared_qgroup(root);
+ ret = test_no_shared_qgroup(root, sectorsize, nodesize);
if (ret)
goto out;
- ret = test_multiple_refs(root);
+ ret = test_multiple_refs(root, sectorsize, nodesize);
out:
btrfs_free_dummy_root(root);
return ret;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f6e24cb423ae..948aa186b353 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -818,6 +818,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
{
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_fs_info *info = root->fs_info;
+ u64 transid = trans->transid;
unsigned long cur = trans->delayed_ref_updates;
int lock = (trans->type != TRANS_JOIN_NOLOCK);
int err = 0;
@@ -905,7 +906,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
kmem_cache_free(btrfs_trans_handle_cachep, trans);
if (must_run_delayed_refs) {
- btrfs_async_run_delayed_refs(root, cur,
+ btrfs_async_run_delayed_refs(root, cur, transid,
must_run_delayed_refs == 1);
}
return err;
@@ -1311,11 +1312,6 @@ int btrfs_defrag_root(struct btrfs_root *root)
return ret;
}
-/* Bisesctability fixup, remove in 4.8 */
-#ifndef btrfs_std_error
-#define btrfs_std_error btrfs_handle_fs_error
-#endif
-
/*
* Do all special snapshot related qgroup dirty hack.
*
@@ -1385,7 +1381,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
switch_commit_roots(trans->transaction, fs_info);
ret = btrfs_write_and_wait_transaction(trans, src);
if (ret)
- btrfs_std_error(fs_info, ret,
+ btrfs_handle_fs_error(fs_info, ret,
"Error while writing out transaction for qgroup");
out:
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 9fe0ec2bf0fe..c5abee4f01ad 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -110,7 +110,6 @@ struct btrfs_trans_handle {
u64 chunk_bytes_reserved;
unsigned long use_count;
unsigned long blocks_reserved;
- unsigned long blocks_used;
unsigned long delayed_ref_updates;
struct btrfs_transaction *transaction;
struct btrfs_block_rsv *block_rsv;
@@ -121,6 +120,7 @@ struct btrfs_trans_handle {
bool can_flush_pending_bgs;
bool reloc_reserved;
bool sync;
+ bool dirty;
unsigned int type;
/*
* this root is only needed to validate that the root passed to
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index b7665af471d8..c05f69a8ec42 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2422,8 +2422,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
root_owner = btrfs_header_owner(parent);
next = btrfs_find_create_tree_block(root, bytenr);
- if (!next)
- return -ENOMEM;
+ if (IS_ERR(next))
+ return PTR_ERR(next);
if (*level == 1) {
ret = wc->process_func(root, next, wc, ptr_gen);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bdc62561ede8..589f128173b1 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2761,6 +2761,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
u64 dev_extent_len = 0;
u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
int i, ret = 0;
+ struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
/* Just in case */
root = root->fs_info->chunk_root;
@@ -2787,12 +2788,19 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
check_system_chunk(trans, extent_root, map->type);
unlock_chunks(root->fs_info->chunk_root);
+ /*
+ * Take the device list mutex to prevent races with the final phase of
+ * a device replace operation that replaces the device object associated
+ * with map stripes (dev-replace.c:btrfs_dev_replace_finishing()).
+ */
+ mutex_lock(&fs_devices->device_list_mutex);
for (i = 0; i < map->num_stripes; i++) {
struct btrfs_device *device = map->stripes[i].dev;
ret = btrfs_free_dev_extent(trans, device,
map->stripes[i].physical,
&dev_extent_len);
if (ret) {
+ mutex_unlock(&fs_devices->device_list_mutex);
btrfs_abort_transaction(trans, root, ret);
goto out;
}
@@ -2811,11 +2819,14 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
if (map->stripes[i].dev) {
ret = btrfs_update_device(trans, map->stripes[i].dev);
if (ret) {
+ mutex_unlock(&fs_devices->device_list_mutex);
btrfs_abort_transaction(trans, root, ret);
goto out;
}
}
}
+ mutex_unlock(&fs_devices->device_list_mutex);
+
ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
@@ -4230,6 +4241,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
if (IS_ERR(uuid_root)) {
ret = PTR_ERR(uuid_root);
btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_end_transaction(trans, tree_root);
return ret;
}
@@ -4682,12 +4694,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (type & BTRFS_BLOCK_GROUP_RAID5) {
raid_stripe_len = find_raid56_stripe_len(ndevs - 1,
- btrfs_super_stripesize(info->super_copy));
+ extent_root->stripesize);
data_stripes = num_stripes - 1;
}
if (type & BTRFS_BLOCK_GROUP_RAID6) {
raid_stripe_len = find_raid56_stripe_len(ndevs - 2,
- btrfs_super_stripesize(info->super_copy));
+ extent_root->stripesize);
data_stripes = num_stripes - 2;
}
@@ -5762,20 +5774,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
}
}
if (found) {
- if (physical_of_found + map->stripe_len <=
- dev_replace->cursor_left) {
- struct btrfs_bio_stripe *tgtdev_stripe =
- bbio->stripes + num_stripes;
+ struct btrfs_bio_stripe *tgtdev_stripe =
+ bbio->stripes + num_stripes;
- tgtdev_stripe->physical = physical_of_found;
- tgtdev_stripe->length =
- bbio->stripes[index_srcdev].length;
- tgtdev_stripe->dev = dev_replace->tgtdev;
- bbio->tgtdev_map[index_srcdev] = num_stripes;
+ tgtdev_stripe->physical = physical_of_found;
+ tgtdev_stripe->length =
+ bbio->stripes[index_srcdev].length;
+ tgtdev_stripe->dev = dev_replace->tgtdev;
+ bbio->tgtdev_map[index_srcdev] = num_stripes;
- tgtdev_indexes++;
- num_stripes++;
- }
+ tgtdev_indexes++;
+ num_stripes++;
}
}
@@ -6250,27 +6259,23 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
return dev;
}
-static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
- struct extent_buffer *leaf,
- struct btrfs_chunk *chunk)
+/* Return -EIO if any error, otherwise return 0. */
+static int btrfs_check_chunk_valid(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk, u64 logical)
{
- struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
- struct map_lookup *map;
- struct extent_map *em;
- u64 logical;
u64 length;
u64 stripe_len;
- u64 devid;
- u8 uuid[BTRFS_UUID_SIZE];
- int num_stripes;
- int ret;
- int i;
+ u16 num_stripes;
+ u16 sub_stripes;
+ u64 type;
- logical = key->offset;
length = btrfs_chunk_length(leaf, chunk);
stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
- /* Validation check */
+ sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+ type = btrfs_chunk_type(leaf, chunk);
+
if (!num_stripes) {
btrfs_err(root->fs_info, "invalid chunk num_stripes: %u",
num_stripes);
@@ -6281,6 +6286,11 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
"invalid chunk logical %llu", logical);
return -EIO;
}
+ if (btrfs_chunk_sector_size(leaf, chunk) != root->sectorsize) {
+ btrfs_err(root->fs_info, "invalid chunk sectorsize %u",
+ btrfs_chunk_sector_size(leaf, chunk));
+ return -EIO;
+ }
if (!length || !IS_ALIGNED(length, root->sectorsize)) {
btrfs_err(root->fs_info,
"invalid chunk length %llu", length);
@@ -6292,13 +6302,54 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
return -EIO;
}
if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
- btrfs_chunk_type(leaf, chunk)) {
+ type) {
btrfs_err(root->fs_info, "unrecognized chunk type: %llu",
~(BTRFS_BLOCK_GROUP_TYPE_MASK |
BTRFS_BLOCK_GROUP_PROFILE_MASK) &
btrfs_chunk_type(leaf, chunk));
return -EIO;
}
+ if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
+ (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
+ (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
+ (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
+ ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
+ num_stripes != 1)) {
+ btrfs_err(root->fs_info,
+ "invalid num_stripes:sub_stripes %u:%u for profile %llu",
+ num_stripes, sub_stripes,
+ type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
+ struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk)
+{
+ struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+ struct map_lookup *map;
+ struct extent_map *em;
+ u64 logical;
+ u64 length;
+ u64 stripe_len;
+ u64 devid;
+ u8 uuid[BTRFS_UUID_SIZE];
+ int num_stripes;
+ int ret;
+ int i;
+
+ logical = key->offset;
+ length = btrfs_chunk_length(leaf, chunk);
+ stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+
+ ret = btrfs_check_chunk_valid(root, leaf, chunk, logical);
+ if (ret)
+ return ret;
read_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6546,6 +6597,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
u32 array_size;
u32 len = 0;
u32 cur_offset;
+ u64 type;
struct btrfs_key key;
ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize);
@@ -6555,8 +6607,8 @@ int btrfs_read_sys_array(struct btrfs_root *root)
* overallocate but we can keep it as-is, only the first page is used.
*/
sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET);
- if (!sb)
- return -ENOMEM;
+ if (IS_ERR(sb))
+ return PTR_ERR(sb);
set_extent_buffer_uptodate(sb);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
/*
@@ -6612,6 +6664,15 @@ int btrfs_read_sys_array(struct btrfs_root *root)
break;
}
+ type = btrfs_chunk_type(sb, chunk);
+ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
+ btrfs_err(root->fs_info,
+ "invalid chunk type %llu in sys_array at offset %u",
+ type, cur_offset);
+ ret = -EIO;
+ break;
+ }
+
len = btrfs_chunk_item_size(num_stripes);
if (cur_offset + len > array_size)
goto out_short_read;
@@ -6630,12 +6691,14 @@ int btrfs_read_sys_array(struct btrfs_root *root)
sb_array_offset += len;
cur_offset += len;
}
+ clear_extent_buffer_uptodate(sb);
free_extent_buffer_stale(sb);
return ret;
out_short_read:
printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n",
len, cur_offset);
+ clear_extent_buffer_uptodate(sb);
free_extent_buffer_stale(sb);
return -EIO;
}
@@ -6648,6 +6711,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
struct btrfs_key found_key;
int ret;
int slot;
+ u64 total_dev = 0;
root = root->fs_info->chunk_root;
@@ -6689,6 +6753,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
ret = read_one_dev(root, leaf, dev_item);
if (ret)
goto error;
+ total_dev++;
} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
struct btrfs_chunk *chunk;
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
@@ -6698,6 +6763,28 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
}
path->slots[0]++;
}
+
+ /*
+ * After loading chunk tree, we've got all device information,
+ * do another round of validation checks.
+ */
+ if (total_dev != root->fs_info->fs_devices->total_devices) {
+ btrfs_err(root->fs_info,
+ "super_num_devices %llu mismatch with num_devices %llu found here",
+ btrfs_super_num_devices(root->fs_info->super_copy),
+ total_dev);
+ ret = -EINVAL;
+ goto error;
+ }
+ if (btrfs_super_total_bytes(root->fs_info->super_copy) <
+ root->fs_info->fs_devices->total_rw_bytes) {
+ btrfs_err(root->fs_info,
+ "super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu",
+ btrfs_super_total_bytes(root->fs_info->super_copy),
+ root->fs_info->fs_devices->total_rw_bytes);
+ ret = -EINVAL;
+ goto error;
+ }
ret = 0;
error:
unlock_chunks(root);
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 861d611b8c05..ce5f345d70f5 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -380,7 +380,7 @@ static void cachefiles_sync_cache(struct fscache_cache *_cache)
* check if the backing cache is updated to FS-Cache
* - called by FS-Cache when evaluates if need to invalidate the cache
*/
-static bool cachefiles_check_consistency(struct fscache_operation *op)
+static int cachefiles_check_consistency(struct fscache_operation *op)
{
struct cachefiles_object *object;
struct cachefiles_cache *cache;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index eeb71e5de27a..26a9d10d75e9 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -276,8 +276,10 @@ static void finish_read(struct ceph_osd_request *req)
for (i = 0; i < num_pages; i++) {
struct page *page = osd_data->pages[i];
- if (rc < 0 && rc != -ENOENT)
+ if (rc < 0 && rc != -ENOENT) {
+ ceph_fscache_readpage_cancel(inode, page);
goto unlock;
+ }
if (bytes < (int)PAGE_SIZE) {
/* zero (remainder of) page */
int s = bytes < 0 ? 0 : bytes;
@@ -535,8 +537,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
- ceph_readpage_to_fscache(inode, page);
-
set_page_writeback(page);
err = ceph_osdc_writepages(osdc, ceph_vino(inode),
&ci->i_layout, snapc,
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index c052b5bf219b..238c55b01723 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -25,6 +25,7 @@
#include "cache.h"
struct ceph_aux_inode {
+ u64 version;
struct timespec mtime;
loff_t size;
};
@@ -69,15 +70,8 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
&ceph_fscache_fsid_object_def,
fsc, true);
-
- if (fsc->fscache == NULL) {
+ if (!fsc->fscache)
pr_err("Unable to resgister fsid: %p fscache cookie", fsc);
- return 0;
- }
-
- fsc->revalidate_wq = alloc_workqueue("ceph-revalidate", 0, 1);
- if (fsc->revalidate_wq == NULL)
- return -ENOMEM;
return 0;
}
@@ -105,6 +99,7 @@ static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data,
const struct inode* inode = &ci->vfs_inode;
memset(&aux, 0, sizeof(aux));
+ aux.version = ci->i_version;
aux.mtime = inode->i_mtime;
aux.size = i_size_read(inode);
@@ -131,6 +126,7 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
return FSCACHE_CHECKAUX_OBSOLETE;
memset(&aux, 0, sizeof(aux));
+ aux.version = ci->i_version;
aux.mtime = inode->i_mtime;
aux.size = i_size_read(inode);
@@ -181,32 +177,26 @@ static const struct fscache_cookie_def ceph_fscache_inode_object_def = {
.now_uncached = ceph_fscache_inode_now_uncached,
};
-void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
- struct ceph_inode_info* ci)
+void ceph_fscache_register_inode_cookie(struct inode *inode)
{
- struct inode* inode = &ci->vfs_inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
/* No caching for filesystem */
if (fsc->fscache == NULL)
return;
/* Only cache for regular files that are read only */
- if ((ci->vfs_inode.i_mode & S_IFREG) == 0)
+ if (!S_ISREG(inode->i_mode))
return;
- /* Avoid multiple racing open requests */
- inode_lock(inode);
-
- if (ci->fscache)
- goto done;
-
- ci->fscache = fscache_acquire_cookie(fsc->fscache,
- &ceph_fscache_inode_object_def,
- ci, true);
- fscache_check_consistency(ci->fscache);
-done:
+ inode_lock_nested(inode, I_MUTEX_CHILD);
+ if (!ci->fscache) {
+ ci->fscache = fscache_acquire_cookie(fsc->fscache,
+ &ceph_fscache_inode_object_def,
+ ci, false);
+ }
inode_unlock(inode);
-
}
void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
@@ -222,6 +212,34 @@ void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
fscache_relinquish_cookie(cookie, 0);
}
+static bool ceph_fscache_can_enable(void *data)
+{
+ struct inode *inode = data;
+ return !inode_is_open_for_write(inode);
+}
+
+void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ if (!fscache_cookie_valid(ci->fscache))
+ return;
+
+ if (inode_is_open_for_write(inode)) {
+ dout("fscache_file_set_cookie %p %p disabling cache\n",
+ inode, filp);
+ fscache_disable_cookie(ci->fscache, false);
+ fscache_uncache_all_inode_pages(ci->fscache, inode);
+ } else {
+ fscache_enable_cookie(ci->fscache, ceph_fscache_can_enable,
+ inode);
+ if (fscache_cookie_enabled(ci->fscache)) {
+ dout("fscache_file_set_cookie %p %p enabing cache\n",
+ inode, filp);
+ }
+ }
+}
+
static void ceph_vfs_readpage_complete(struct page *page, void *data, int error)
{
if (!error)
@@ -238,8 +256,7 @@ static void ceph_vfs_readpage_complete_unlock(struct page *page, void *data, int
static inline bool cache_valid(struct ceph_inode_info *ci)
{
- return ((ceph_caps_issued(ci) & CEPH_CAP_FILE_CACHE) &&
- (ci->i_fscache_gen == ci->i_rdcache_gen));
+ return ci->i_fscache_gen == ci->i_rdcache_gen;
}
@@ -332,69 +349,27 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
{
- if (fsc->revalidate_wq)
- destroy_workqueue(fsc->revalidate_wq);
-
fscache_relinquish_cookie(fsc->fscache, 0);
fsc->fscache = NULL;
}
-static void ceph_revalidate_work(struct work_struct *work)
-{
- int issued;
- u32 orig_gen;
- struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
- i_revalidate_work);
- struct inode *inode = &ci->vfs_inode;
-
- spin_lock(&ci->i_ceph_lock);
- issued = __ceph_caps_issued(ci, NULL);
- orig_gen = ci->i_rdcache_gen;
- spin_unlock(&ci->i_ceph_lock);
-
- if (!(issued & CEPH_CAP_FILE_CACHE)) {
- dout("revalidate_work lost cache before validation %p\n",
- inode);
- goto out;
- }
-
- if (!fscache_check_consistency(ci->fscache))
- fscache_invalidate(ci->fscache);
-
- spin_lock(&ci->i_ceph_lock);
- /* Update the new valid generation (backwards sanity check too) */
- if (orig_gen > ci->i_fscache_gen) {
- ci->i_fscache_gen = orig_gen;
- }
- spin_unlock(&ci->i_ceph_lock);
-
-out:
- iput(&ci->vfs_inode);
-}
-
-void ceph_queue_revalidate(struct inode *inode)
+/*
+ * caller should hold CEPH_CAP_FILE_{RD,CACHE}
+ */
+void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci)
{
- struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
- struct ceph_inode_info *ci = ceph_inode(inode);
-
- if (fsc->revalidate_wq == NULL || ci->fscache == NULL)
+ if (cache_valid(ci))
return;
- ihold(inode);
-
- if (queue_work(ceph_sb_to_client(inode->i_sb)->revalidate_wq,
- &ci->i_revalidate_work)) {
- dout("ceph_queue_revalidate %p\n", inode);
- } else {
- dout("ceph_queue_revalidate %p failed\n)", inode);
- iput(inode);
+ /* resue i_truncate_mutex. There should be no pending
+ * truncate while the caller holds CEPH_CAP_FILE_RD */
+ mutex_lock(&ci->i_truncate_mutex);
+ if (!cache_valid(ci)) {
+ if (fscache_check_consistency(ci->fscache))
+ fscache_invalidate(ci->fscache);
+ spin_lock(&ci->i_ceph_lock);
+ ci->i_fscache_gen = ci->i_rdcache_gen;
+ spin_unlock(&ci->i_ceph_lock);
}
-}
-
-void ceph_fscache_inode_init(struct ceph_inode_info *ci)
-{
- ci->fscache = NULL;
- /* The first load is verifed cookie open time */
- ci->i_fscache_gen = 1;
- INIT_WORK(&ci->i_revalidate_work, ceph_revalidate_work);
+ mutex_unlock(&ci->i_truncate_mutex);
}
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h
index 5ac591bd012b..7e72c7594f0c 100644
--- a/fs/ceph/cache.h
+++ b/fs/ceph/cache.h
@@ -34,10 +34,10 @@ void ceph_fscache_unregister(void);
int ceph_fscache_register_fs(struct ceph_fs_client* fsc);
void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc);
-void ceph_fscache_inode_init(struct ceph_inode_info *ci);
-void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
- struct ceph_inode_info* ci);
+void ceph_fscache_register_inode_cookie(struct inode *inode);
void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci);
+void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp);
+void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci);
int ceph_readpage_from_fscache(struct inode *inode, struct page *page);
int ceph_readpages_from_fscache(struct inode *inode,
@@ -46,12 +46,11 @@ int ceph_readpages_from_fscache(struct inode *inode,
unsigned *nr_pages);
void ceph_readpage_to_fscache(struct inode *inode, struct page *page);
void ceph_invalidate_fscache_page(struct inode* inode, struct page *page);
-void ceph_queue_revalidate(struct inode *inode);
-static inline void ceph_fscache_update_objectsize(struct inode *inode)
+static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
{
- struct ceph_inode_info *ci = ceph_inode(inode);
- fscache_attr_changed(ci->fscache);
+ ci->fscache = NULL;
+ ci->i_fscache_gen = 0;
}
static inline void ceph_fscache_invalidate(struct inode *inode)
@@ -88,6 +87,11 @@ static inline void ceph_fscache_readpages_cancel(struct inode *inode,
return fscache_readpages_cancel(ci->fscache, pages);
}
+static inline void ceph_disable_fscache_readpage(struct ceph_inode_info *ci)
+{
+ ci->i_fscache_gen = ci->i_rdcache_gen - 1;
+}
+
#else
static inline int ceph_fscache_register(void)
@@ -112,8 +116,20 @@ static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
{
}
-static inline void ceph_fscache_register_inode_cookie(struct ceph_fs_client* parent_fsc,
- struct ceph_inode_info* ci)
+static inline void ceph_fscache_register_inode_cookie(struct inode *inode)
+{
+}
+
+static inline void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
+{
+}
+
+static inline void ceph_fscache_file_set_cookie(struct inode *inode,
+ struct file *filp)
+{
+}
+
+static inline void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci)
{
}
@@ -141,10 +157,6 @@ static inline void ceph_readpage_to_fscache(struct inode *inode,
{
}
-static inline void ceph_fscache_update_objectsize(struct inode *inode)
-{
-}
-
static inline void ceph_fscache_invalidate(struct inode *inode)
{
}
@@ -154,10 +166,6 @@ static inline void ceph_invalidate_fscache_page(struct inode *inode,
{
}
-static inline void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
-{
-}
-
static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
{
return 1;
@@ -173,7 +181,7 @@ static inline void ceph_fscache_readpages_cancel(struct inode *inode,
{
}
-static inline void ceph_queue_revalidate(struct inode *inode)
+static inline void ceph_disable_fscache_readpage(struct ceph_inode_info *ci)
{
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index c17b5d76d75e..6f60d0a3d0f9 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2393,6 +2393,9 @@ again:
snap_rwsem_locked = true;
}
*got = need | (have & want);
+ if ((need & CEPH_CAP_FILE_RD) &&
+ !(*got & CEPH_CAP_FILE_CACHE))
+ ceph_disable_fscache_readpage(ci);
__take_cap_refs(ci, *got, true);
ret = 1;
}
@@ -2554,6 +2557,9 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
break;
}
+ if ((_got & CEPH_CAP_FILE_RD) && (_got & CEPH_CAP_FILE_CACHE))
+ ceph_fscache_revalidate_cookie(ci);
+
*got = _got;
return 0;
}
@@ -2795,7 +2801,6 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
bool writeback = false;
bool queue_trunc = false;
bool queue_invalidate = false;
- bool queue_revalidate = false;
bool deleted_inode = false;
bool fill_inline = false;
@@ -2837,8 +2842,6 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
ci->i_rdcache_revoking = ci->i_rdcache_gen;
}
}
-
- ceph_fscache_invalidate(inode);
}
/* side effects now are allowed */
@@ -2880,11 +2883,6 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
}
}
- /* Do we need to revalidate our fscache cookie. Don't bother on the
- * first cache cap as we already validate at cookie creation time. */
- if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1)
- queue_revalidate = true;
-
if (newcaps & CEPH_CAP_ANY_RD) {
/* ctime/mtime/atime? */
ceph_decode_timespec(&mtime, &grant->mtime);
@@ -2993,11 +2991,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
if (fill_inline)
ceph_fill_inline_data(inode, NULL, inline_data, inline_len);
- if (queue_trunc) {
+ if (queue_trunc)
ceph_queue_vmtruncate(inode);
- ceph_queue_revalidate(inode);
- } else if (queue_revalidate)
- ceph_queue_revalidate(inode);
if (writeback)
/*
@@ -3199,10 +3194,8 @@ static void handle_cap_trunc(struct inode *inode,
truncate_seq, truncate_size, size);
spin_unlock(&ci->i_ceph_lock);
- if (queue_trunc) {
+ if (queue_trunc)
ceph_queue_vmtruncate(inode);
- ceph_fscache_invalidate(inode);
- }
}
/*
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 6e72c98162d5..1780218a48f0 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -95,10 +95,8 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
}
dentry = d_obtain_alias(inode);
- if (IS_ERR(dentry)) {
- iput(inode);
+ if (IS_ERR(dentry))
return dentry;
- }
err = ceph_init_dentry(dentry);
if (err < 0) {
dput(dentry);
@@ -167,10 +165,8 @@ static struct dentry *__get_parent(struct super_block *sb,
return ERR_PTR(-ENOENT);
dentry = d_obtain_alias(inode);
- if (IS_ERR(dentry)) {
- iput(inode);
+ if (IS_ERR(dentry))
return dentry;
- }
err = ceph_init_dentry(dentry);
if (err < 0) {
dput(dentry);
@@ -210,7 +206,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
dout("fh_to_parent %llx\n", cfh->parent_ino);
dentry = __get_parent(sb, NULL, cfh->ino);
- if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT)
+ if (unlikely(dentry == ERR_PTR(-ENOENT)))
dentry = __fh_to_dentry(sb, cfh->parent_ino);
return dentry;
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index a888df6f2d71..0daaf7ceedc5 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -137,23 +137,11 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
{
struct ceph_file_info *cf;
int ret = 0;
- struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
- struct ceph_mds_client *mdsc = fsc->mdsc;
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
- /* First file open request creates the cookie, we want to keep
- * this cookie around for the filetime of the inode as not to
- * have to worry about fscache register / revoke / operation
- * races.
- *
- * Also, if we know the operation is going to invalidate data
- * (non readonly) just nuke the cache right away.
- */
- ceph_fscache_register_inode_cookie(mdsc->fsc, ci);
- if ((fmode & CEPH_FILE_MODE_WR))
- ceph_fscache_invalidate(inode);
+ ceph_fscache_register_inode_cookie(inode);
+ ceph_fscache_file_set_cookie(inode, file);
case S_IFDIR:
dout("init_file %p %p 0%o (regular)\n", inode, file,
inode->i_mode);
@@ -406,7 +394,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
- if (d_unhashed(dentry)) {
+ if (d_in_lookup(dentry)) {
dn = ceph_finish_lookup(req, dentry, err);
if (IS_ERR(dn))
err = PTR_ERR(dn);
@@ -1349,7 +1337,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
retry_snap:
- if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) {
+ if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) {
err = -ENOSPC;
goto out;
}
@@ -1407,7 +1395,6 @@ retry_snap:
iov_iter_advance(from, written);
ceph_put_snap_context(snapc);
} else {
- loff_t old_size = i_size_read(inode);
/*
* No need to acquire the i_truncate_mutex. Because
* the MDS revokes Fwb caps before sending truncate
@@ -1418,8 +1405,6 @@ retry_snap:
written = generic_perform_write(file, from, pos);
if (likely(written >= 0))
iocb->ki_pos = pos + written;
- if (i_size_read(inode) > old_size)
- ceph_fscache_update_objectsize(inode);
inode_unlock(inode);
}
@@ -1440,7 +1425,7 @@ retry_snap:
ceph_put_cap_refs(ci, got);
if (written >= 0) {
- if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))
+ if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_NEARFULL))
iocb->ki_flags |= IOCB_DSYNC;
written = generic_write_sync(iocb, written);
@@ -1672,8 +1657,8 @@ static long ceph_fallocate(struct file *file, int mode,
goto unlock;
}
- if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) &&
- !(mode & FALLOC_FL_PUNCH_HOLE)) {
+ if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) &&
+ !(mode & FALLOC_FL_PUNCH_HOLE)) {
ret = -ENOSPC;
goto unlock;
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 0130a8592191..0168b49fb6ad 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -103,7 +103,6 @@ struct ceph_fs_client {
#ifdef CONFIG_CEPH_FSCACHE
struct fscache_cookie *fscache;
- struct workqueue_struct *revalidate_wq;
#endif
};
@@ -360,8 +359,7 @@ struct ceph_inode_info {
#ifdef CONFIG_CEPH_FSCACHE
struct fscache_cookie *fscache;
- u32 i_fscache_gen; /* sequence, for delayed fscache validate */
- struct work_struct i_revalidate_work;
+ u32 i_fscache_gen;
#endif
struct inode vfs_inode; /* at end */
};
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 5a53ac6b1e02..02b071bf3732 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -101,6 +101,12 @@ convert_sfm_char(const __u16 src_char, char *target)
case SFM_SLASH:
*target = '\\';
break;
+ case SFM_SPACE:
+ *target = ' ';
+ break;
+ case SFM_PERIOD:
+ *target = '.';
+ break;
default:
return false;
}
@@ -404,7 +410,7 @@ static __le16 convert_to_sfu_char(char src_char)
return dest_char;
}
-static __le16 convert_to_sfm_char(char src_char)
+static __le16 convert_to_sfm_char(char src_char, bool end_of_string)
{
__le16 dest_char;
@@ -427,6 +433,18 @@ static __le16 convert_to_sfm_char(char src_char)
case '|':
dest_char = cpu_to_le16(SFM_PIPE);
break;
+ case '.':
+ if (end_of_string)
+ dest_char = cpu_to_le16(SFM_PERIOD);
+ else
+ dest_char = 0;
+ break;
+ case ' ':
+ if (end_of_string)
+ dest_char = cpu_to_le16(SFM_SPACE);
+ else
+ dest_char = 0;
+ break;
default:
dest_char = 0;
}
@@ -469,9 +487,16 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
/* see if we must remap this char */
if (map_chars == SFU_MAP_UNI_RSVD)
dst_char = convert_to_sfu_char(src_char);
- else if (map_chars == SFM_MAP_UNI_RSVD)
- dst_char = convert_to_sfm_char(src_char);
- else
+ else if (map_chars == SFM_MAP_UNI_RSVD) {
+ bool end_of_string;
+
+ if (i == srclen - 1)
+ end_of_string = true;
+ else
+ end_of_string = false;
+
+ dst_char = convert_to_sfm_char(src_char, end_of_string);
+ } else
dst_char = 0;
/*
* FIXME: We can not handle remapping backslash (UNI_SLASH)
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index bdc52cb9a676..479bc0a941f3 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -64,6 +64,8 @@
#define SFM_LESSTHAN ((__u16) 0xF023)
#define SFM_PIPE ((__u16) 0xF027)
#define SFM_SLASH ((__u16) 0xF026)
+#define SFM_PERIOD ((__u16) 0xF028)
+#define SFM_SPACE ((__u16) 0xF029)
/*
* Mapping mechanism to use when one of the seven reserved characters is
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5d8b7edf8a8f..5d841f39c4b7 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -87,6 +87,7 @@ extern mempool_t *cifs_req_poolp;
extern mempool_t *cifs_mid_poolp;
struct workqueue_struct *cifsiod_wq;
+__u32 cifs_lock_secret;
/*
* Bumps refcount for cifs super block.
@@ -1266,6 +1267,8 @@ init_cifs(void)
spin_lock_init(&cifs_file_list_lock);
spin_lock_init(&GlobalMid_Lock);
+ get_random_bytes(&cifs_lock_secret, sizeof(cifs_lock_secret));
+
if (cifs_max_pending < 2) {
cifs_max_pending = 2;
cifs_dbg(FYI, "cifs_max_pending set to min of 2\n");
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index bba106cdc43c..8f1d8c1e72be 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1619,6 +1619,7 @@ void cifs_oplock_break(struct work_struct *work);
extern const struct slow_work_ops cifs_oplock_break_ops;
extern struct workqueue_struct *cifsiod_wq;
+extern __u32 cifs_lock_secret;
extern mempool_t *cifs_mid_poolp;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 66736f57b5ab..7d2b15c06090 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -428,7 +428,9 @@ cifs_echo_request(struct work_struct *work)
* server->ops->need_neg() == true. Also, no need to ping if
* we got a response recently.
*/
- if (!server->ops->need_neg || server->ops->need_neg(server) ||
+
+ if (server->tcpStatus == CifsNeedReconnect ||
+ server->tcpStatus == CifsExiting || server->tcpStatus == CifsNew ||
(server->ops->can_echo && !server->ops->can_echo(server)) ||
time_before(jiffies, server->lstrp + echo_interval - HZ))
goto requeue_echo;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index c3eb998a99bd..fb0903fffc22 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -445,7 +445,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
* Check for hashed negative dentry. We have already revalidated
* the dentry and it is fine. No need to perform another lookup.
*/
- if (!d_unhashed(direntry))
+ if (!d_in_lookup(direntry))
return -ENOENT;
res = cifs_lookup(inode, direntry, 0);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9793ae0bcaa2..d4890b6dc22d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1112,6 +1112,12 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
return rc;
}
+static __u32
+hash_lockowner(fl_owner_t owner)
+{
+ return cifs_lock_secret ^ hash32_ptr((const void *)owner);
+}
+
struct lock_to_push {
struct list_head llist;
__u64 offset;
@@ -1178,7 +1184,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
else
type = CIFS_WRLCK;
lck = list_entry(el, struct lock_to_push, llist);
- lck->pid = flock->fl_pid;
+ lck->pid = hash_lockowner(flock->fl_owner);
lck->netfid = cfile->fid.netfid;
lck->length = length;
lck->type = type;
@@ -1305,7 +1311,8 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
posix_lock_type = CIFS_RDLCK;
else
posix_lock_type = CIFS_WRLCK;
- rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
+ rc = CIFSSMBPosixLock(xid, tcon, netfid,
+ hash_lockowner(flock->fl_owner),
flock->fl_start, length, flock,
posix_lock_type, wait_flag);
return rc;
@@ -1505,7 +1512,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
posix_lock_type = CIFS_UNLCK;
rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
- current->tgid, flock->fl_start, length,
+ hash_lockowner(flock->fl_owner),
+ flock->fl_start, length,
NULL, posix_lock_type, wait_flag);
goto out;
}
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index 848249fa120f..3079b38f0afb 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -133,6 +133,6 @@ typedef struct _AUTHENTICATE_MESSAGE {
int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses);
void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, struct cifs_ses *ses);
-int build_ntlmssp_auth_blob(unsigned char *pbuffer, u16 *buflen,
+int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
struct cifs_ses *ses,
const struct nls_table *nls_cp);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index af0ec2d5ad0e..538d9b55699a 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -364,19 +364,43 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
sec_blob->DomainName.MaximumLength = 0;
}
-/* We do not malloc the blob, it is passed in pbuffer, because its
- maximum possible size is fixed and small, making this approach cleaner.
- This function returns the length of the data in the blob */
-int build_ntlmssp_auth_blob(unsigned char *pbuffer,
+static int size_of_ntlmssp_blob(struct cifs_ses *ses)
+{
+ int sz = sizeof(AUTHENTICATE_MESSAGE) + ses->auth_key.len
+ - CIFS_SESS_KEY_SIZE + CIFS_CPHTXT_SIZE + 2;
+
+ if (ses->domainName)
+ sz += 2 * strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
+ else
+ sz += 2;
+
+ if (ses->user_name)
+ sz += 2 * strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN);
+ else
+ sz += 2;
+
+ return sz;
+}
+
+int build_ntlmssp_auth_blob(unsigned char **pbuffer,
u16 *buflen,
struct cifs_ses *ses,
const struct nls_table *nls_cp)
{
int rc;
- AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
+ AUTHENTICATE_MESSAGE *sec_blob;
__u32 flags;
unsigned char *tmp;
+ rc = setup_ntlmv2_rsp(ses, nls_cp);
+ if (rc) {
+ cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
+ *buflen = 0;
+ goto setup_ntlmv2_ret;
+ }
+ *pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL);
+ sec_blob = (AUTHENTICATE_MESSAGE *)*pbuffer;
+
memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
sec_blob->MessageType = NtLmAuthenticate;
@@ -391,7 +415,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
}
- tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE);
+ tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE);
sec_blob->NegotiateFlags = cpu_to_le32(flags);
sec_blob->LmChallengeResponse.BufferOffset =
@@ -399,13 +423,9 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
sec_blob->LmChallengeResponse.Length = 0;
sec_blob->LmChallengeResponse.MaximumLength = 0;
- sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->NtChallengeResponse.BufferOffset =
+ cpu_to_le32(tmp - *pbuffer);
if (ses->user_name != NULL) {
- rc = setup_ntlmv2_rsp(ses, nls_cp);
- if (rc) {
- cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
- goto setup_ntlmv2_ret;
- }
memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
ses->auth_key.len - CIFS_SESS_KEY_SIZE);
tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
@@ -423,23 +443,23 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
}
if (ses->domainName == NULL) {
- sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->DomainName.Length = 0;
sec_blob->DomainName.MaximumLength = 0;
tmp += 2;
} else {
int len;
len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName,
- CIFS_MAX_USERNAME_LEN, nls_cp);
+ CIFS_MAX_DOMAINNAME_LEN, nls_cp);
len *= 2; /* unicode is 2 bytes each */
- sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->DomainName.Length = cpu_to_le16(len);
sec_blob->DomainName.MaximumLength = cpu_to_le16(len);
tmp += len;
}
if (ses->user_name == NULL) {
- sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->UserName.Length = 0;
sec_blob->UserName.MaximumLength = 0;
tmp += 2;
@@ -448,13 +468,13 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name,
CIFS_MAX_USERNAME_LEN, nls_cp);
len *= 2; /* unicode is 2 bytes each */
- sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->UserName.Length = cpu_to_le16(len);
sec_blob->UserName.MaximumLength = cpu_to_le16(len);
tmp += len;
}
- sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->WorkstationName.Length = 0;
sec_blob->WorkstationName.MaximumLength = 0;
tmp += 2;
@@ -463,19 +483,19 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
(ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
&& !calc_seckey(ses)) {
memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
- sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
sec_blob->SessionKey.MaximumLength =
cpu_to_le16(CIFS_CPHTXT_SIZE);
tmp += CIFS_CPHTXT_SIZE;
} else {
- sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->SessionKey.Length = 0;
sec_blob->SessionKey.MaximumLength = 0;
}
+ *buflen = tmp - *pbuffer;
setup_ntlmv2_ret:
- *buflen = tmp - pbuffer;
return rc;
}
@@ -690,6 +710,8 @@ sess_auth_lanman(struct sess_data *sess_data)
rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
true : false, lnm_session_key);
+ if (rc)
+ goto out;
memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
bcc_ptr += CIFS_AUTH_RESP_SIZE;
@@ -1266,7 +1288,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
struct cifs_ses *ses = sess_data->ses;
__u16 bytes_remaining;
char *bcc_ptr;
- char *ntlmsspblob = NULL;
+ unsigned char *ntlmsspblob = NULL;
u16 blob_len;
cifs_dbg(FYI, "rawntlmssp session setup authenticate phase\n");
@@ -1279,19 +1301,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
/* Build security blob before we assemble the request */
pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
smb_buf = (struct smb_hdr *)pSMB;
- /*
- * 5 is an empirical value, large enough to hold
- * authenticate message plus max 10 of av paris,
- * domain, user, workstation names, flags, etc.
- */
- ntlmsspblob = kzalloc(5*sizeof(struct _AUTHENTICATE_MESSAGE),
- GFP_KERNEL);
- if (!ntlmsspblob) {
- rc = -ENOMEM;
- goto out;
- }
-
- rc = build_ntlmssp_auth_blob(ntlmsspblob,
+ rc = build_ntlmssp_auth_blob(&ntlmsspblob,
&blob_len, ses, sess_data->nls_cp);
if (rc)
goto out_free_ntlmsspblob;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 8f38e33d365b..29e06db5f187 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -588,7 +588,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
u16 blob_length = 0;
struct key *spnego_key = NULL;
char *security_blob = NULL;
- char *ntlmssp_blob = NULL;
+ unsigned char *ntlmssp_blob = NULL;
bool use_spnego = false; /* else use raw ntlmssp */
cifs_dbg(FYI, "Session Setup\n");
@@ -713,13 +713,7 @@ ssetup_ntlmssp_authenticate:
iov[1].iov_len = blob_length;
} else if (phase == NtLmAuthenticate) {
req->hdr.SessionId = ses->Suid;
- ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500,
- GFP_KERNEL);
- if (ntlmssp_blob == NULL) {
- rc = -ENOMEM;
- goto ssetup_exit;
- }
- rc = build_ntlmssp_auth_blob(ntlmssp_blob, &blob_length, ses,
+ rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
nls_cp);
if (rc) {
cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n",
@@ -1818,6 +1812,33 @@ SMB2_echo(struct TCP_Server_Info *server)
cifs_dbg(FYI, "In echo request\n");
+ if (server->tcpStatus == CifsNeedNegotiate) {
+ struct list_head *tmp, *tmp2;
+ struct cifs_ses *ses;
+ struct cifs_tcon *tcon;
+
+ cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n");
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each(tmp, &server->smb_ses_list) {
+ ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+ list_for_each(tmp2, &ses->tcon_list) {
+ tcon = list_entry(tmp2, struct cifs_tcon,
+ tcon_list);
+ /* add check for persistent handle reconnect */
+ if (tcon && tcon->need_reconnect) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ rc = smb2_reconnect(SMB2_ECHO, tcon);
+ spin_lock(&cifs_tcp_ses_lock);
+ }
+ }
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
+ }
+
+ /* if no session, renegotiate failed above */
+ if (server->tcpStatus == CifsNeedNegotiate)
+ return -EIO;
+
rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req);
if (rc)
return rc;
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 33b7ee34eda5..bbc1252a59f5 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -357,8 +357,6 @@ configfs_write_bin_file(struct file *file, const char __user *buf,
len = simple_write_to_buffer(buffer->bin_buffer,
buffer->bin_buffer_size, ppos, buf, count);
- if (len > 0)
- *ppos += len;
out:
mutex_unlock(&buffer->mutex);
return len;
diff --git a/fs/coredump.c b/fs/coredump.c
index 38a7ab87e10a..281b768000e6 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -794,6 +794,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
return 0;
file->f_pos = pos;
cprm->written += n;
+ cprm->pos += n;
nr -= n;
}
return 1;
@@ -808,6 +809,7 @@ int dump_skip(struct coredump_params *cprm, size_t nr)
if (dump_interrupted() ||
file->f_op->llseek(file, nr, SEEK_CUR) < 0)
return 0;
+ cprm->pos += nr;
return 1;
} else {
while (nr > PAGE_SIZE) {
@@ -822,7 +824,7 @@ EXPORT_SYMBOL(dump_skip);
int dump_align(struct coredump_params *cprm, int align)
{
- unsigned mod = cprm->file->f_pos & (align - 1);
+ unsigned mod = cprm->pos & (align - 1);
if (align & (align - 1))
return 0;
return mod ? dump_skip(cprm, align - mod) : 1;
diff --git a/fs/dax.c b/fs/dax.c
index 761495bf5eb9..e207f8f9b700 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -208,7 +208,12 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
dax.addr += first;
size = map_len - first;
}
- max = min(pos + size, end);
+ /*
+ * pos + size is one past the last offset for IO,
+ * so pos + size can overflow loff_t at extreme offsets.
+ * Cast to u64 to catch this and get the true minimum.
+ */
+ max = min_t(u64, pos + size, end);
}
if (iov_iter_rw(iter) == WRITE) {
diff --git a/fs/dcache.c b/fs/dcache.c
index ad4a542e9bab..d6847d7b123d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -507,6 +507,44 @@ void d_drop(struct dentry *dentry)
}
EXPORT_SYMBOL(d_drop);
+static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent)
+{
+ struct dentry *next;
+ /*
+ * Inform d_walk() and shrink_dentry_list() that we are no longer
+ * attached to the dentry tree
+ */
+ dentry->d_flags |= DCACHE_DENTRY_KILLED;
+ if (unlikely(list_empty(&dentry->d_child)))
+ return;
+ __list_del_entry(&dentry->d_child);
+ /*
+ * Cursors can move around the list of children. While we'd been
+ * a normal list member, it didn't matter - ->d_child.next would've
+ * been updated. However, from now on it won't be and for the
+ * things like d_walk() it might end up with a nasty surprise.
+ * Normally d_walk() doesn't care about cursors moving around -
+ * ->d_lock on parent prevents that and since a cursor has no children
+ * of its own, we get through it without ever unlocking the parent.
+ * There is one exception, though - if we ascend from a child that
+ * gets killed as soon as we unlock it, the next sibling is found
+ * using the value left in its ->d_child.next. And if _that_
+ * pointed to a cursor, and cursor got moved (e.g. by lseek())
+ * before d_walk() regains parent->d_lock, we'll end up skipping
+ * everything the cursor had been moved past.
+ *
+ * Solution: make sure that the pointer left behind in ->d_child.next
+ * points to something that won't be moving around. I.e. skip the
+ * cursors.
+ */
+ while (dentry->d_child.next != &parent->d_subdirs) {
+ next = list_entry(dentry->d_child.next, struct dentry, d_child);
+ if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR)))
+ break;
+ dentry->d_child.next = next->d_child.next;
+ }
+}
+
static void __dentry_kill(struct dentry *dentry)
{
struct dentry *parent = NULL;
@@ -532,12 +570,7 @@ static void __dentry_kill(struct dentry *dentry)
}
/* if it was on the hash then remove it */
__d_drop(dentry);
- __list_del_entry(&dentry->d_child);
- /*
- * Inform d_walk() that we are no longer attached to the
- * dentry tree
- */
- dentry->d_flags |= DCACHE_DENTRY_KILLED;
+ dentry_unlist(dentry, parent);
if (parent)
spin_unlock(&parent->d_lock);
dentry_iput(dentry);
@@ -1203,6 +1236,9 @@ resume:
struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
next = tmp->next;
+ if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
+ continue;
+
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
ret = enter(data, dentry);
@@ -1636,7 +1672,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
struct dentry *dentry = __d_alloc(parent->d_sb, name);
if (!dentry)
return NULL;
-
+ dentry->d_flags |= DCACHE_RCUACCESS;
spin_lock(&parent->d_lock);
/*
* don't need child lock because it is not subject
@@ -1651,6 +1687,16 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
}
EXPORT_SYMBOL(d_alloc);
+struct dentry *d_alloc_cursor(struct dentry * parent)
+{
+ struct dentry *dentry = __d_alloc(parent->d_sb, NULL);
+ if (dentry) {
+ dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR;
+ dentry->d_parent = dget(parent);
+ }
+ return dentry;
+}
+
/**
* d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
* @sb: the superblock
@@ -2358,7 +2404,6 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
{
BUG_ON(!d_unhashed(entry));
hlist_bl_lock(b);
- entry->d_flags |= DCACHE_RCUACCESS;
hlist_bl_add_head_rcu(&entry->d_hash, b);
hlist_bl_unlock(b);
}
@@ -2458,7 +2503,6 @@ retry:
rcu_read_unlock();
goto retry;
}
- rcu_read_unlock();
/*
* No changes for the parent since the beginning of d_lookup().
* Since all removals from the chain happen with hlist_bl_lock(),
@@ -2471,8 +2515,6 @@ retry:
continue;
if (dentry->d_parent != parent)
continue;
- if (d_unhashed(dentry))
- continue;
if (parent->d_flags & DCACHE_OP_COMPARE) {
int tlen = dentry->d_name.len;
const char *tname = dentry->d_name.name;
@@ -2484,9 +2526,18 @@ retry:
if (dentry_cmp(dentry, str, len))
continue;
}
- dget(dentry);
hlist_bl_unlock(b);
- /* somebody is doing lookup for it right now; wait for it */
+ /* now we can try to grab a reference */
+ if (!lockref_get_not_dead(&dentry->d_lockref)) {
+ rcu_read_unlock();
+ goto retry;
+ }
+
+ rcu_read_unlock();
+ /*
+ * somebody is likely to be still doing lookup for it;
+ * wait for them to finish
+ */
spin_lock(&dentry->d_lock);
d_wait_lookup(dentry);
/*
@@ -2517,6 +2568,7 @@ retry:
dput(new);
return dentry;
}
+ rcu_read_unlock();
/* we can't take ->d_lock here; it's OK, though. */
new->d_flags |= DCACHE_PAR_LOOKUP;
new->d_wait = wq;
@@ -2843,6 +2895,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
/* ... and switch them in the tree */
if (IS_ROOT(dentry)) {
/* splicing a tree */
+ dentry->d_flags |= DCACHE_RCUACCESS;
dentry->d_parent = target->d_parent;
target->d_parent = target;
list_del_init(&target->d_child);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 9c1c9a01b7e5..592059f88e04 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -127,7 +127,6 @@ static int open_proxy_open(struct inode *inode, struct file *filp)
r = real_fops->open(inode, filp);
out:
- fops_put(real_fops);
debugfs_use_file_finish(srcu_idx);
return r;
}
@@ -262,8 +261,10 @@ static int full_proxy_open(struct inode *inode, struct file *filp)
if (real_fops->open) {
r = real_fops->open(inode, filp);
-
- if (filp->f_op != proxy_fops) {
+ if (r) {
+ replace_fops(filp, d_inode(dentry)->i_fop);
+ goto free_proxy;
+ } else if (filp->f_op != proxy_fops) {
/* No protection against file removal anymore. */
WARN(1, "debugfs file owner replaced proxy fops: %pd",
dentry);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 0b2954d7172d..37c134a132c7 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -95,8 +95,6 @@ static struct ctl_table pty_root_table[] = {
static DEFINE_MUTEX(allocated_ptys_lock);
-static struct vfsmount *devpts_mnt;
-
struct pts_mount_opts {
int setuid;
int setgid;
@@ -104,7 +102,7 @@ struct pts_mount_opts {
kgid_t gid;
umode_t mode;
umode_t ptmxmode;
- int newinstance;
+ int reserve;
int max;
};
@@ -117,11 +115,9 @@ static const match_table_t tokens = {
{Opt_uid, "uid=%u"},
{Opt_gid, "gid=%u"},
{Opt_mode, "mode=%o"},
-#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
{Opt_ptmxmode, "ptmxmode=%o"},
{Opt_newinstance, "newinstance"},
{Opt_max, "max=%d"},
-#endif
{Opt_err, NULL}
};
@@ -137,15 +133,48 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
return sb->s_fs_info;
}
-static inline struct super_block *pts_sb_from_inode(struct inode *inode)
+struct pts_fs_info *devpts_acquire(struct file *filp)
{
-#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
- if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
- return inode->i_sb;
-#endif
- if (!devpts_mnt)
- return NULL;
- return devpts_mnt->mnt_sb;
+ struct pts_fs_info *result;
+ struct path path;
+ struct super_block *sb;
+ int err;
+
+ path = filp->f_path;
+ path_get(&path);
+
+ /* Has the devpts filesystem already been found? */
+ sb = path.mnt->mnt_sb;
+ if (sb->s_magic != DEVPTS_SUPER_MAGIC) {
+ /* Is a devpts filesystem at "pts" in the same directory? */
+ err = path_pts(&path);
+ if (err) {
+ result = ERR_PTR(err);
+ goto out;
+ }
+
+ /* Is the path the root of a devpts filesystem? */
+ result = ERR_PTR(-ENODEV);
+ sb = path.mnt->mnt_sb;
+ if ((sb->s_magic != DEVPTS_SUPER_MAGIC) ||
+ (path.mnt->mnt_root != sb->s_root))
+ goto out;
+ }
+
+ /*
+ * pty code needs to hold extra references in case of last /dev/tty close
+ */
+ atomic_inc(&sb->s_active);
+ result = DEVPTS_SB(sb);
+
+out:
+ path_put(&path);
+ return result;
+}
+
+void devpts_release(struct pts_fs_info *fsi)
+{
+ deactivate_super(fsi->sb);
}
#define PARSE_MOUNT 0
@@ -154,9 +183,7 @@ static inline struct super_block *pts_sb_from_inode(struct inode *inode)
/*
* parse_mount_options():
* Set @opts to mount options specified in @data. If an option is not
- * specified in @data, set it to its default value. The exception is
- * 'newinstance' option which can only be set/cleared on a mount (i.e.
- * cannot be changed during remount).
+ * specified in @data, set it to its default value.
*
* Note: @data may be NULL (in which case all options are set to default).
*/
@@ -174,9 +201,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
opts->max = NR_UNIX98_PTY_MAX;
- /* newinstance makes sense only on initial mount */
+ /* Only allow instances mounted from the initial mount
+ * namespace to tap the reserve pool of ptys.
+ */
if (op == PARSE_MOUNT)
- opts->newinstance = 0;
+ opts->reserve =
+ (current->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns);
while ((p = strsep(&data, ",")) != NULL) {
substring_t args[MAX_OPT_ARGS];
@@ -211,16 +241,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
return -EINVAL;
opts->mode = option & S_IALLUGO;
break;
-#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
case Opt_ptmxmode:
if (match_octal(&args[0], &option))
return -EINVAL;
opts->ptmxmode = option & S_IALLUGO;
break;
case Opt_newinstance:
- /* newinstance makes sense only on initial mount */
- if (op == PARSE_MOUNT)
- opts->newinstance = 1;
break;
case Opt_max:
if (match_int(&args[0], &option) ||
@@ -228,7 +254,6 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
return -EINVAL;
opts->max = option;
break;
-#endif
default:
pr_err("called with bogus options\n");
return -EINVAL;
@@ -238,7 +263,6 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
return 0;
}
-#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
static int mknod_ptmx(struct super_block *sb)
{
int mode;
@@ -305,12 +329,6 @@ static void update_ptmx_mode(struct pts_fs_info *fsi)
inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode;
}
}
-#else
-static inline void update_ptmx_mode(struct pts_fs_info *fsi)
-{
- return;
-}
-#endif
static int devpts_remount(struct super_block *sb, int *flags, char *data)
{
@@ -344,11 +362,9 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",gid=%u",
from_kgid_munged(&init_user_ns, opts->gid));
seq_printf(seq, ",mode=%03o", opts->mode);
-#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
if (opts->max < NR_UNIX98_PTY_MAX)
seq_printf(seq, ",max=%d", opts->max);
-#endif
return 0;
}
@@ -410,40 +426,11 @@ fail:
return -ENOMEM;
}
-#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
-static int compare_init_pts_sb(struct super_block *s, void *p)
-{
- if (devpts_mnt)
- return devpts_mnt->mnt_sb == s;
- return 0;
-}
-
/*
* devpts_mount()
*
- * If the '-o newinstance' mount option was specified, mount a new
- * (private) instance of devpts. PTYs created in this instance are
- * independent of the PTYs in other devpts instances.
- *
- * If the '-o newinstance' option was not specified, mount/remount the
- * initial kernel mount of devpts. This type of mount gives the
- * legacy, single-instance semantics.
- *
- * The 'newinstance' option is needed to support multiple namespace
- * semantics in devpts while preserving backward compatibility of the
- * current 'single-namespace' semantics. i.e all mounts of devpts
- * without the 'newinstance' mount option should bind to the initial
- * kernel mount, like mount_single().
- *
- * Mounts with 'newinstance' option create a new, private namespace.
- *
- * NOTE:
- *
- * For single-mount semantics, devpts cannot use mount_single(),
- * because mount_single()/sget() find and use the super-block from
- * the most recent mount of devpts. But that recent mount may be a
- * 'newinstance' mount and mount_single() would pick the newinstance
- * super-block instead of the initial super-block.
+ * Mount a new (private) instance of devpts. PTYs created in this
+ * instance are independent of the PTYs in other devpts instances.
*/
static struct dentry *devpts_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
@@ -456,18 +443,7 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type,
if (error)
return ERR_PTR(error);
- /* Require newinstance for all user namespace mounts to ensure
- * the mount options are not changed.
- */
- if ((current_user_ns() != &init_user_ns) && !opts.newinstance)
- return ERR_PTR(-EINVAL);
-
- if (opts.newinstance)
- s = sget(fs_type, NULL, set_anon_super, flags, NULL);
- else
- s = sget(fs_type, compare_init_pts_sb, set_anon_super, flags,
- NULL);
-
+ s = sget(fs_type, NULL, set_anon_super, flags, NULL);
if (IS_ERR(s))
return ERR_CAST(s);
@@ -491,18 +467,6 @@ out_undo_sget:
return ERR_PTR(error);
}
-#else
-/*
- * This supports only the legacy single-instance semantics (no
- * multiple-instance semantics)
- */
-static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data)
-{
- return mount_single(fs_type, flags, data, devpts_fill_super);
-}
-#endif
-
static void devpts_kill_sb(struct super_block *sb)
{
struct pts_fs_info *fsi = DEVPTS_SB(sb);
@@ -516,9 +480,7 @@ static struct file_system_type devpts_fs_type = {
.name = "devpts",
.mount = devpts_mount,
.kill_sb = devpts_kill_sb,
-#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
.fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT,
-#endif
};
/*
@@ -531,16 +493,13 @@ int devpts_new_index(struct pts_fs_info *fsi)
int index;
int ida_ret;
- if (!fsi)
- return -ENODEV;
-
retry:
if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL))
return -ENOMEM;
mutex_lock(&allocated_ptys_lock);
- if (pty_count >= pty_limit -
- (fsi->mount_opts.newinstance ? pty_reserve : 0)) {
+ if (pty_count >= (pty_limit -
+ (fsi->mount_opts.reserve ? 0 : pty_reserve))) {
mutex_unlock(&allocated_ptys_lock);
return -ENOSPC;
}
@@ -571,30 +530,6 @@ void devpts_kill_index(struct pts_fs_info *fsi, int idx)
mutex_unlock(&allocated_ptys_lock);
}
-/*
- * pty code needs to hold extra references in case of last /dev/tty close
- */
-struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file)
-{
- struct super_block *sb;
- struct pts_fs_info *fsi;
-
- sb = pts_sb_from_inode(ptmx_inode);
- if (!sb)
- return NULL;
- fsi = DEVPTS_SB(sb);
- if (!fsi)
- return NULL;
-
- atomic_inc(&sb->s_active);
- return fsi;
-}
-
-void devpts_put_ref(struct pts_fs_info *fsi)
-{
- deactivate_super(fsi->sb);
-}
-
/**
* devpts_pty_new -- create a new inode in /dev/pts/
* @ptmx_inode: inode of the master
@@ -607,16 +542,12 @@ void devpts_put_ref(struct pts_fs_info *fsi)
struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
{
struct dentry *dentry;
- struct super_block *sb;
+ struct super_block *sb = fsi->sb;
struct inode *inode;
struct dentry *root;
struct pts_mount_opts *opts;
char s[12];
- if (!fsi)
- return ERR_PTR(-ENODEV);
-
- sb = fsi->sb;
root = sb->s_root;
opts = &fsi->mount_opts;
@@ -676,20 +607,8 @@ void devpts_pty_kill(struct dentry *dentry)
static int __init init_devpts_fs(void)
{
int err = register_filesystem(&devpts_fs_type);
- struct ctl_table_header *table;
-
if (!err) {
- struct vfsmount *mnt;
-
- table = register_sysctl_table(pty_root_table);
- mnt = kern_mount(&devpts_fs_type);
- if (IS_ERR(mnt)) {
- err = PTR_ERR(mnt);
- unregister_filesystem(&devpts_fs_type);
- unregister_sysctl_table(table);
- } else {
- devpts_mnt = mnt;
- }
+ register_sysctl_table(pty_root_table);
}
return err;
}
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 0d8eb3455b34..e5e29f8c920b 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -45,7 +45,7 @@
* ecryptfs_to_hex
* @dst: Buffer to take hex character representation of contents of
* src; must be at least of size (src_size * 2)
- * @src: Buffer to be converted to a hex string respresentation
+ * @src: Buffer to be converted to a hex string representation
* @src_size: number of bytes to convert
*/
void ecryptfs_to_hex(char *dst, char *src, size_t src_size)
@@ -60,7 +60,7 @@ void ecryptfs_to_hex(char *dst, char *src, size_t src_size)
* ecryptfs_from_hex
* @dst: Buffer to take the bytes from src hex; must be at least of
* size (src_size / 2)
- * @src: Buffer to be converted from a hex string respresentation to raw value
+ * @src: Buffer to be converted from a hex string representation to raw value
* @dst_size: size of dst buffer, or number of hex characters pairs to convert
*/
void ecryptfs_from_hex(char *dst, char *src, int dst_size)
@@ -953,7 +953,7 @@ struct ecryptfs_cipher_code_str_map_elem {
};
/* Add support for additional ciphers by adding elements here. The
- * cipher_code is whatever OpenPGP applicatoins use to identify the
+ * cipher_code is whatever OpenPGP applications use to identify the
* ciphers. List in order of probability. */
static struct ecryptfs_cipher_code_str_map_elem
ecryptfs_cipher_code_str_map[] = {
@@ -1410,7 +1410,7 @@ int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry,
*
* Common entry point for reading file metadata. From here, we could
* retrieve the header information from the header region of the file,
- * the xattr region of the file, or some other repostory that is
+ * the xattr region of the file, or some other repository that is
* stored separately from the file itself. The current implementation
* supports retrieving the metadata information from the file contents
* and from the xattr region.
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 7000b96b783e..ca4e83750214 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -169,9 +169,22 @@ out:
return rc;
}
+static int ecryptfs_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct file *lower_file = ecryptfs_file_to_lower(file);
+ /*
+ * Don't allow mmap on top of file systems that don't support it
+ * natively. If FILESYSTEM_MAX_STACK_DEPTH > 2 or ecryptfs
+ * allows recursive mounting, this will need to be extended.
+ */
+ if (!lower_file->f_op->mmap)
+ return -ENODEV;
+ return generic_file_mmap(file, vma);
+}
+
/**
* ecryptfs_open
- * @inode: inode speciying file to open
+ * @inode: inode specifying file to open
* @file: Structure to return filled in
*
* Opens the file specified by inode.
@@ -240,7 +253,7 @@ out:
/**
* ecryptfs_dir_open
- * @inode: inode speciying file to open
+ * @inode: inode specifying file to open
* @file: Structure to return filled in
*
* Opens the file specified by inode.
@@ -403,7 +416,7 @@ const struct file_operations ecryptfs_main_fops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ecryptfs_compat_ioctl,
#endif
- .mmap = generic_file_mmap,
+ .mmap = ecryptfs_mmap,
.open = ecryptfs_open,
.flush = ecryptfs_flush,
.release = ecryptfs_release,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 1698132d0e57..612004495141 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -738,8 +738,7 @@ static void ecryptfs_free_kmem_caches(void)
struct ecryptfs_cache_info *info;
info = &ecryptfs_cache_infos[i];
- if (*(info->cache))
- kmem_cache_destroy(*(info->cache));
+ kmem_cache_destroy(*(info->cache));
}
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 989a2cef6b76..fe7e83a45eff 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -483,9 +483,9 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
goto out_free;
}
inode->i_state |= I_WB_SWITCH;
+ __iget(inode);
spin_unlock(&inode->i_lock);
- ihold(inode);
isw->inode = inode;
atomic_inc(&isw_nr_in_flight);
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 3078b679fcd1..c8c4f79c7ce1 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -887,6 +887,8 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie)
put_page(results[i]);
}
+ wake_up_bit(&cookie->flags, 0);
+
_leave("");
}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index ccd4971cc6c1..cca7b048c07b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -341,8 +341,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
struct dentry *newent;
bool outarg_valid = true;
+ fuse_lock_inode(dir);
err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
&outarg, &inode);
+ fuse_unlock_inode(dir);
if (err == -ENOENT) {
outarg_valid = false;
err = 0;
@@ -478,7 +480,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
struct fuse_conn *fc = get_fuse_conn(dir);
struct dentry *res = NULL;
- if (d_unhashed(entry)) {
+ if (d_in_lookup(entry)) {
res = fuse_lookup(dir, entry, 0);
if (IS_ERR(res))
return PTR_ERR(res);
@@ -1341,7 +1343,9 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx)
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
FUSE_READDIR);
}
+ fuse_lock_inode(inode);
fuse_request_send(fc, req);
+ fuse_unlock_inode(inode);
nbytes = req->out.args[0].size;
err = req->out.h.error;
fuse_put_request(fc, req);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index eddbe02c4028..929c383432b0 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -110,6 +110,9 @@ struct fuse_inode {
/** Miscellaneous bits describing inode state */
unsigned long state;
+
+ /** Lock for serializing lookup and readdir for back compatibility*/
+ struct mutex mutex;
};
/** FUSE inode state bits */
@@ -540,6 +543,9 @@ struct fuse_conn {
/** write-back cache policy (default is write-through) */
unsigned writeback_cache:1;
+ /** allow parallel lookups and readdir (default is serialized) */
+ unsigned parallel_dirops:1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
@@ -956,4 +962,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
void fuse_set_initialized(struct fuse_conn *fc);
+void fuse_unlock_inode(struct inode *inode);
+void fuse_lock_inode(struct inode *inode);
+
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1ce67668a8e1..9961d8432ce3 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -97,6 +97,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
INIT_LIST_HEAD(&fi->queued_writes);
INIT_LIST_HEAD(&fi->writepages);
init_waitqueue_head(&fi->page_waitq);
+ mutex_init(&fi->mutex);
fi->forget = fuse_alloc_forget();
if (!fi->forget) {
kmem_cache_free(fuse_inode_cachep, inode);
@@ -117,6 +118,7 @@ static void fuse_destroy_inode(struct inode *inode)
struct fuse_inode *fi = get_fuse_inode(inode);
BUG_ON(!list_empty(&fi->write_files));
BUG_ON(!list_empty(&fi->queued_writes));
+ mutex_destroy(&fi->mutex);
kfree(fi->forget);
call_rcu(&inode->i_rcu, fuse_i_callback);
}
@@ -351,6 +353,18 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
return 0;
}
+void fuse_lock_inode(struct inode *inode)
+{
+ if (!get_fuse_conn(inode)->parallel_dirops)
+ mutex_lock(&get_fuse_inode(inode)->mutex);
+}
+
+void fuse_unlock_inode(struct inode *inode)
+{
+ if (!get_fuse_conn(inode)->parallel_dirops)
+ mutex_unlock(&get_fuse_inode(inode)->mutex);
+}
+
static void fuse_umount_begin(struct super_block *sb)
{
fuse_abort_conn(get_fuse_conn_super(sb));
@@ -898,6 +912,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->async_dio = 1;
if (arg->flags & FUSE_WRITEBACK_CACHE)
fc->writeback_cache = 1;
+ if (arg->flags & FUSE_PARALLEL_DIROPS)
+ fc->parallel_dirops = 1;
if (arg->time_gran && arg->time_gran <= 1000000000)
fc->sb->s_time_gran = arg->time_gran;
} else {
@@ -928,7 +944,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
- FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
+ FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
+ FUSE_PARALLEL_DIROPS;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 21dc784f66c2..9bad79fede37 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1189,7 +1189,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
struct dentry *d;
bool excl = !!(flags & O_EXCL);
- if (!d_unhashed(dentry))
+ if (!d_in_lookup(dentry))
goto skip_lookup;
d = __gfs2_lookup(dir, dentry, file, opened);
diff --git a/fs/internal.h b/fs/internal.h
index b71deeecea17..f57ced528cde 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -130,6 +130,7 @@ extern int invalidate_inodes(struct super_block *, bool);
extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
extern int d_set_mounted(struct dentry *dentry);
extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
+extern struct dentry *d_alloc_cursor(struct dentry *);
/*
* read_write.c
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b31852f76f46..e3ca4b4cac84 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2329,18 +2329,10 @@ void *jbd2_alloc(size_t size, gfp_t flags)
BUG_ON(size & (size-1)); /* Must be a power of 2 */
- flags |= __GFP_REPEAT;
- if (size == PAGE_SIZE)
- ptr = (void *)__get_free_pages(flags, 0);
- else if (size > PAGE_SIZE) {
- int order = get_order(size);
-
- if (order < 3)
- ptr = (void *)__get_free_pages(flags, order);
- else
- ptr = vmalloc(size);
- } else
+ if (size < PAGE_SIZE)
ptr = kmem_cache_alloc(get_slab(size), flags);
+ else
+ ptr = (void *)__get_free_pages(flags, get_order(size));
/* Check alignment; SLUB has gotten this wrong in the past,
* and this can lead to user data corruption! */
@@ -2351,20 +2343,10 @@ void *jbd2_alloc(size_t size, gfp_t flags)
void jbd2_free(void *ptr, size_t size)
{
- if (size == PAGE_SIZE) {
- free_pages((unsigned long)ptr, 0);
- return;
- }
- if (size > PAGE_SIZE) {
- int order = get_order(size);
-
- if (order < 3)
- free_pages((unsigned long)ptr, order);
- else
- vfree(ptr);
- return;
- }
- kmem_cache_free(get_slab(size), ptr);
+ if (size < PAGE_SIZE)
+ kmem_cache_free(get_slab(size), ptr);
+ else
+ free_pages((unsigned long)ptr, get_order(size));
};
/*
diff --git a/fs/libfs.c b/fs/libfs.c
index 3db2721144c2..74dc8b9e7f53 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -71,9 +71,7 @@ EXPORT_SYMBOL(simple_lookup);
int dcache_dir_open(struct inode *inode, struct file *file)
{
- static struct qstr cursor_name = QSTR_INIT(".", 1);
-
- file->private_data = d_alloc(file->f_path.dentry, &cursor_name);
+ file->private_data = d_alloc_cursor(file->f_path.dentry);
return file->private_data ? 0 : -ENOMEM;
}
@@ -86,6 +84,61 @@ int dcache_dir_close(struct inode *inode, struct file *file)
}
EXPORT_SYMBOL(dcache_dir_close);
+/* parent is locked at least shared */
+static struct dentry *next_positive(struct dentry *parent,
+ struct list_head *from,
+ int count)
+{
+ unsigned *seq = &parent->d_inode->i_dir_seq, n;
+ struct dentry *res;
+ struct list_head *p;
+ bool skipped;
+ int i;
+
+retry:
+ i = count;
+ skipped = false;
+ n = smp_load_acquire(seq) & ~1;
+ res = NULL;
+ rcu_read_lock();
+ for (p = from->next; p != &parent->d_subdirs; p = p->next) {
+ struct dentry *d = list_entry(p, struct dentry, d_child);
+ if (!simple_positive(d)) {
+ skipped = true;
+ } else if (!--i) {
+ res = d;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ if (skipped) {
+ smp_rmb();
+ if (unlikely(*seq != n))
+ goto retry;
+ }
+ return res;
+}
+
+static void move_cursor(struct dentry *cursor, struct list_head *after)
+{
+ struct dentry *parent = cursor->d_parent;
+ unsigned n, *seq = &parent->d_inode->i_dir_seq;
+ spin_lock(&parent->d_lock);
+ for (;;) {
+ n = *seq;
+ if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
+ break;
+ cpu_relax();
+ }
+ __list_del(cursor->d_child.prev, cursor->d_child.next);
+ if (after)
+ list_add(&cursor->d_child, after);
+ else
+ list_add_tail(&cursor->d_child, &parent->d_subdirs);
+ smp_store_release(seq, n + 2);
+ spin_unlock(&parent->d_lock);
+}
+
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
{
struct dentry *dentry = file->f_path.dentry;
@@ -101,25 +154,14 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
if (offset != file->f_pos) {
file->f_pos = offset;
if (file->f_pos >= 2) {
- struct list_head *p;
struct dentry *cursor = file->private_data;
+ struct dentry *to;
loff_t n = file->f_pos - 2;
- spin_lock(&dentry->d_lock);
- /* d_lock not required for cursor */
- list_del(&cursor->d_child);
- p = dentry->d_subdirs.next;
- while (n && p != &dentry->d_subdirs) {
- struct dentry *next;
- next = list_entry(p, struct dentry, d_child);
- spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
- if (simple_positive(next))
- n--;
- spin_unlock(&next->d_lock);
- p = p->next;
- }
- list_add_tail(&cursor->d_child, p);
- spin_unlock(&dentry->d_lock);
+ inode_lock_shared(dentry->d_inode);
+ to = next_positive(dentry, &dentry->d_subdirs, n);
+ move_cursor(cursor, to ? &to->d_child : NULL);
+ inode_unlock_shared(dentry->d_inode);
}
}
return offset;
@@ -142,36 +184,25 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
{
struct dentry *dentry = file->f_path.dentry;
struct dentry *cursor = file->private_data;
- struct list_head *p, *q = &cursor->d_child;
+ struct list_head *p = &cursor->d_child;
+ struct dentry *next;
+ bool moved = false;
if (!dir_emit_dots(file, ctx))
return 0;
- spin_lock(&dentry->d_lock);
- if (ctx->pos == 2)
- list_move(q, &dentry->d_subdirs);
- for (p = q->next; p != &dentry->d_subdirs; p = p->next) {
- struct dentry *next = list_entry(p, struct dentry, d_child);
- spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
- if (!simple_positive(next)) {
- spin_unlock(&next->d_lock);
- continue;
- }
-
- spin_unlock(&next->d_lock);
- spin_unlock(&dentry->d_lock);
+ if (ctx->pos == 2)
+ p = &dentry->d_subdirs;
+ while ((next = next_positive(dentry, p, 1)) != NULL) {
if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
d_inode(next)->i_ino, dt_type(d_inode(next))))
- return 0;
- spin_lock(&dentry->d_lock);
- spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
- /* next is still alive */
- list_move(q, p);
- spin_unlock(&next->d_lock);
- p = q;
+ break;
+ moved = true;
+ p = &next->d_child;
ctx->pos++;
}
- spin_unlock(&dentry->d_lock);
+ if (moved)
+ move_cursor(cursor, p);
return 0;
}
EXPORT_SYMBOL(dcache_readdir);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 154a107cd376..fc4084ef4736 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -335,12 +335,17 @@ static struct notifier_block lockd_inet6addr_notifier = {
};
#endif
-static void lockd_svc_exit_thread(void)
+static void lockd_unregister_notifiers(void)
{
unregister_inetaddr_notifier(&lockd_inetaddr_notifier);
#if IS_ENABLED(CONFIG_IPV6)
unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
#endif
+}
+
+static void lockd_svc_exit_thread(void)
+{
+ lockd_unregister_notifiers();
svc_exit_thread(nlmsvc_rqst);
}
@@ -462,7 +467,7 @@ int lockd_up(struct net *net)
* Note: svc_serv structures have an initial use count of 1,
* so we exit through here on both success and failure.
*/
-err_net:
+err_put:
svc_destroy(serv);
err_create:
mutex_unlock(&nlmsvc_mutex);
@@ -470,7 +475,9 @@ err_create:
err_start:
lockd_down_net(serv, net);
- goto err_net;
+err_net:
+ lockd_unregister_notifiers();
+ goto err_put;
}
EXPORT_SYMBOL_GPL(lockd_up);
diff --git a/fs/locks.c b/fs/locks.c
index 7c5f91be9b65..ee1b15f6fc13 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1628,7 +1628,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
{
struct file_lock *fl, *my_fl = NULL, *lease;
struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file_inode(filp);
struct file_lock_context *ctx;
bool is_deleg = (*flp)->fl_flags & FL_DELEG;
int error;
diff --git a/fs/namei.c b/fs/namei.c
index 4c4f95ac8aa5..70580ab1445c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1416,21 +1416,28 @@ static void follow_mount(struct path *path)
}
}
+static int path_parent_directory(struct path *path)
+{
+ struct dentry *old = path->dentry;
+ /* rare case of legitimate dget_parent()... */
+ path->dentry = dget_parent(path->dentry);
+ dput(old);
+ if (unlikely(!path_connected(path)))
+ return -ENOENT;
+ return 0;
+}
+
static int follow_dotdot(struct nameidata *nd)
{
while(1) {
- struct dentry *old = nd->path.dentry;
-
if (nd->path.dentry == nd->root.dentry &&
nd->path.mnt == nd->root.mnt) {
break;
}
if (nd->path.dentry != nd->path.mnt->mnt_root) {
- /* rare case of legitimate dget_parent()... */
- nd->path.dentry = dget_parent(nd->path.dentry);
- dput(old);
- if (unlikely(!path_connected(&nd->path)))
- return -ENOENT;
+ int ret = path_parent_directory(&nd->path);
+ if (ret)
+ return ret;
break;
}
if (!follow_up(&nd->path))
@@ -2514,6 +2521,34 @@ struct dentry *lookup_one_len_unlocked(const char *name,
}
EXPORT_SYMBOL(lookup_one_len_unlocked);
+#ifdef CONFIG_UNIX98_PTYS
+int path_pts(struct path *path)
+{
+ /* Find something mounted on "pts" in the same directory as
+ * the input path.
+ */
+ struct dentry *child, *parent;
+ struct qstr this;
+ int ret;
+
+ ret = path_parent_directory(path);
+ if (ret)
+ return ret;
+
+ parent = path->dentry;
+ this.name = "pts";
+ this.len = 3;
+ child = d_hash_and_lookup(parent, &this);
+ if (!child)
+ return -ENOENT;
+
+ path->dentry = child;
+ dput(parent);
+ follow_mount(path);
+ return 0;
+}
+#endif
+
int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
struct path *path, int *empty)
{
@@ -2995,9 +3030,13 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
}
if (*opened & FILE_CREATED)
fsnotify_create(dir, dentry);
- path->dentry = dentry;
- path->mnt = nd->path.mnt;
- return 1;
+ if (unlikely(d_is_negative(dentry))) {
+ error = -ENOENT;
+ } else {
+ path->dentry = dentry;
+ path->mnt = nd->path.mnt;
+ return 1;
+ }
}
}
dput(dentry);
@@ -3166,9 +3205,7 @@ static int do_last(struct nameidata *nd,
int acc_mode = op->acc_mode;
unsigned seq;
struct inode *inode;
- struct path save_parent = { .dentry = NULL, .mnt = NULL };
struct path path;
- bool retried = false;
int error;
nd->flags &= ~LOOKUP_PARENT;
@@ -3211,7 +3248,6 @@ static int do_last(struct nameidata *nd,
return -EISDIR;
}
-retry_lookup:
if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
error = mnt_want_write(nd->path.mnt);
if (!error)
@@ -3263,6 +3299,10 @@ retry_lookup:
got_write = false;
}
+ error = follow_managed(&path, nd);
+ if (unlikely(error < 0))
+ return error;
+
if (unlikely(d_is_negative(path.dentry))) {
path_to_nameidata(&path, nd);
return -ENOENT;
@@ -3278,10 +3318,6 @@ retry_lookup:
return -EEXIST;
}
- error = follow_managed(&path, nd);
- if (unlikely(error < 0))
- return error;
-
seq = 0; /* out of RCU mode, so the value doesn't matter */
inode = d_backing_inode(path.dentry);
finish_lookup:
@@ -3292,23 +3328,14 @@ finish_lookup:
if (unlikely(error))
return error;
- if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) {
- path_to_nameidata(&path, nd);
- } else {
- save_parent.dentry = nd->path.dentry;
- save_parent.mnt = mntget(path.mnt);
- nd->path.dentry = path.dentry;
-
- }
+ path_to_nameidata(&path, nd);
nd->inode = inode;
nd->seq = seq;
/* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
finish_open:
error = complete_walk(nd);
- if (error) {
- path_put(&save_parent);
+ if (error)
return error;
- }
audit_inode(nd->name, nd->path.dentry, 0);
error = -EISDIR;
if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
@@ -3331,13 +3358,9 @@ finish_open_created:
goto out;
BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
error = vfs_open(&nd->path, file, current_cred());
- if (!error) {
- *opened |= FILE_OPENED;
- } else {
- if (error == -EOPENSTALE)
- goto stale_open;
+ if (error)
goto out;
- }
+ *opened |= FILE_OPENED;
opened:
error = open_check_o_direct(file);
if (!error)
@@ -3353,26 +3376,7 @@ out:
}
if (got_write)
mnt_drop_write(nd->path.mnt);
- path_put(&save_parent);
return error;
-
-stale_open:
- /* If no saved parent or already retried then can't retry */
- if (!save_parent.dentry || retried)
- goto out;
-
- BUG_ON(save_parent.dentry != dir);
- path_put(&nd->path);
- nd->path = save_parent;
- nd->inode = dir->d_inode;
- save_parent.mnt = NULL;
- save_parent.dentry = NULL;
- if (got_write) {
- mnt_drop_write(nd->path.mnt);
- got_write = false;
- }
- retried = true;
- goto retry_lookup;
}
static int do_tmpfile(struct nameidata *nd, unsigned flags,
diff --git a/fs/namespace.c b/fs/namespace.c
index 4fb1691b4355..419f746d851d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1562,6 +1562,7 @@ void __detach_mounts(struct dentry *dentry)
goto out_unlock;
lock_mount_hash();
+ event++;
while (!hlist_empty(&mp->m_list)) {
mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
@@ -2409,8 +2410,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
}
if (type->fs_flags & FS_USERNS_VISIBLE) {
- if (!fs_fully_visible(type, &mnt_flags))
+ if (!fs_fully_visible(type, &mnt_flags)) {
+ put_filesystem(type);
return -EPERM;
+ }
}
}
@@ -3245,6 +3248,10 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC)
mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC);
+ /* Don't miss readonly hidden in the superblock flags */
+ if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY)
+ mnt_flags |= MNT_LOCK_READONLY;
+
/* Verify the mount flags are equal to or more permissive
* than the proposed new mount.
*/
@@ -3271,7 +3278,7 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
struct inode *inode = child->mnt_mountpoint->d_inode;
/* Only worry about locked mounts */
- if (!(mnt_flags & MNT_LOCKED))
+ if (!(child->mnt.mnt_flags & MNT_LOCKED))
continue;
/* Is the directory permanetly empty? */
if (!is_empty_dir_inode(inode))
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index aaf7bd0cbae2..19d93d0cd400 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -424,12 +424,17 @@ static int xdr_decode(nfs_readdir_descriptor_t *desc,
static
int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
{
+ struct inode *inode;
struct nfs_inode *nfsi;
if (d_really_is_negative(dentry))
return 0;
- nfsi = NFS_I(d_inode(dentry));
+ inode = d_inode(dentry);
+ if (is_bad_inode(inode) || NFS_STALE(inode))
+ return 0;
+
+ nfsi = NFS_I(inode);
if (entry->fattr->fileid == nfsi->fileid)
return 1;
if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0)
@@ -1363,7 +1368,6 @@ EXPORT_SYMBOL_GPL(nfs_dentry_operations);
struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
{
struct dentry *res;
- struct dentry *parent;
struct inode *inode = NULL;
struct nfs_fh *fhandle = NULL;
struct nfs_fattr *fattr = NULL;
@@ -1393,7 +1397,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
if (IS_ERR(label))
goto out;
- parent = dentry->d_parent;
/* Protect against concurrent sillydeletes */
trace_nfs_lookup_enter(dir, dentry, flags);
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
@@ -1482,11 +1485,13 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned open_flags,
umode_t mode, int *opened)
{
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
struct nfs_open_context *ctx;
struct dentry *res;
struct iattr attr = { .ia_valid = ATTR_OPEN };
struct inode *inode;
unsigned int lookup_flags = 0;
+ bool switched = false;
int err;
/* Expect a negative dentry */
@@ -1501,7 +1506,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
/* NFS only supports OPEN on regular files */
if ((open_flags & O_DIRECTORY)) {
- if (!d_unhashed(dentry)) {
+ if (!d_in_lookup(dentry)) {
/*
* Hashed negative dentry with O_DIRECTORY: dentry was
* revalidated and is fine, no need to perform lookup
@@ -1525,6 +1530,17 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
attr.ia_size = 0;
}
+ if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) {
+ d_drop(dentry);
+ switched = true;
+ dentry = d_alloc_parallel(dentry->d_parent,
+ &dentry->d_name, &wq);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ if (unlikely(!d_in_lookup(dentry)))
+ return finish_no_open(file, dentry);
+ }
+
ctx = create_nfs_open_context(dentry, open_flags);
err = PTR_ERR(ctx);
if (IS_ERR(ctx))
@@ -1536,9 +1552,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
err = PTR_ERR(inode);
trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
put_nfs_open_context(ctx);
+ d_drop(dentry);
switch (err) {
case -ENOENT:
- d_drop(dentry);
d_add(dentry, NULL);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
break;
@@ -1560,14 +1576,23 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
put_nfs_open_context(ctx);
out:
+ if (unlikely(switched)) {
+ d_lookup_done(dentry);
+ dput(dentry);
+ }
return err;
no_open:
res = nfs_lookup(dir, dentry, lookup_flags);
- err = PTR_ERR(res);
+ if (switched) {
+ d_lookup_done(dentry);
+ if (!res)
+ res = dentry;
+ else
+ dput(dentry);
+ }
if (IS_ERR(res))
- goto out;
-
+ return PTR_ERR(res);
return finish_no_open(file, res);
}
EXPORT_SYMBOL_GPL(nfs_atomic_open);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 979b3c4dee6a..c7326c2af2c3 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -353,10 +353,12 @@ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
result = wait_for_completion_killable(&dreq->completion);
+ if (!result) {
+ result = dreq->count;
+ WARN_ON_ONCE(dreq->count < 0);
+ }
if (!result)
result = dreq->error;
- if (!result)
- result = dreq->count;
out:
return (ssize_t) result;
@@ -386,8 +388,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
if (dreq->iocb) {
long res = (long) dreq->error;
- if (!res)
+ if (dreq->count != 0) {
res = (long) dreq->count;
+ WARN_ON_ONCE(dreq->count < 0);
+ }
dreq->iocb->ki_complete(dreq->iocb, res, 0);
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 52e7d6869e3b..dda689d7a8a7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -282,6 +282,7 @@ nfs_init_locked(struct inode *inode, void *opaque)
struct nfs_fattr *fattr = desc->fattr;
set_nfs_fileid(inode, fattr->fileid);
+ inode->i_mode = fattr->mode;
nfs_copy_fh(NFS_FH(inode), desc->fh);
return 0;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index de97567795a5..ff416d0e24bc 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2882,12 +2882,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
call_close |= is_wronly;
else if (is_wronly)
calldata->arg.fmode |= FMODE_WRITE;
+ if (calldata->arg.fmode != (FMODE_READ|FMODE_WRITE))
+ call_close |= is_rdwr;
} else if (is_rdwr)
calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
- if (calldata->arg.fmode == 0)
- call_close |= is_rdwr;
-
if (!nfs4_valid_open_stateid(state))
call_close = 0;
spin_unlock(&state->owner->so_lock);
@@ -7924,8 +7923,8 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
break;
}
lo = NFS_I(inode)->layout;
- if (lo && nfs4_stateid_match(&lgp->args.stateid,
- &lo->plh_stateid)) {
+ if (lo && !test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) &&
+ nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) {
LIST_HEAD(head);
/*
@@ -7936,10 +7935,10 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&head);
+ status = -EAGAIN;
+ goto out;
} else
spin_unlock(&inode->i_lock);
- status = -EAGAIN;
- goto out;
}
status = nfs4_handle_exception(server, status, exception);
@@ -8036,7 +8035,10 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
.flags = RPC_TASK_ASYNC,
};
struct pnfs_layout_segment *lseg = NULL;
- struct nfs4_exception exception = { .timeout = *timeout };
+ struct nfs4_exception exception = {
+ .inode = inode,
+ .timeout = *timeout,
+ };
int status = 0;
dprintk("--> %s\n", __func__);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9679f4749364..834b875900d6 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1488,9 +1488,9 @@ restart:
}
spin_unlock(&state->state_lock);
}
- nfs4_put_open_state(state);
clear_bit(NFS_STATE_RECLAIM_NOGRACE,
&state->flags);
+ nfs4_put_open_state(state);
spin_lock(&sp->so_lock);
goto restart;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0c7e0d45a4de..0fbe734cc38c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -361,8 +361,10 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
list_del_init(&lseg->pls_list);
/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
atomic_dec(&lo->plh_refcount);
- if (list_empty(&lo->plh_segs))
+ if (list_empty(&lo->plh_segs)) {
+ set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+ }
rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
}
@@ -1290,6 +1292,7 @@ alloc_init_layout_hdr(struct inode *ino,
INIT_LIST_HEAD(&lo->plh_bulk_destroy);
lo->plh_inode = ino;
lo->plh_lc_cred = get_rpccred(ctx->cred);
+ lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID;
return lo;
}
@@ -1297,6 +1300,8 @@ static struct pnfs_layout_hdr *
pnfs_find_alloc_layout(struct inode *ino,
struct nfs_open_context *ctx,
gfp_t gfp_flags)
+ __releases(&ino->i_lock)
+ __acquires(&ino->i_lock)
{
struct nfs_inode *nfsi = NFS_I(ino);
struct pnfs_layout_hdr *new = NULL;
@@ -1565,8 +1570,7 @@ lookup_again:
* stateid, or it has been invalidated, then we must use the open
* stateid.
*/
- if (lo->plh_stateid.seqid == 0 ||
- test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) {
+ if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) {
/*
* The first layoutget for the file. Need to serialize per
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 0dfc476da3e1..b38e3c0dc790 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -247,7 +247,11 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages,
}
/* Helper function for pnfs_generic_commit_pagelist to catch an empty
- * page list. This can happen when two commits race. */
+ * page list. This can happen when two commits race.
+ *
+ * This must be called instead of nfs_init_commit - call one or the other, but
+ * not both!
+ */
static bool
pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
struct nfs_commit_data *data,
@@ -256,7 +260,11 @@ pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
if (list_empty(pages)) {
if (atomic_dec_and_test(&cinfo->mds->rpcs_out))
wake_up_atomic_t(&cinfo->mds->rpcs_out);
- nfs_commitdata_release(data);
+ /* don't call nfs_commitdata_release - it tries to put
+ * the open_context which is not acquired until nfs_init_commit
+ * which has not been called on @data */
+ WARN_ON_ONCE(data->context);
+ nfs_commit_free(data);
return true;
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 6776d7a7839e..572e5b3b06f1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -367,13 +367,13 @@ readpage_async_filler(void *data, struct page *page)
nfs_list_remove_request(new);
nfs_readpage_release(new);
error = desc->pgio->pg_error;
- goto out_unlock;
+ goto out;
}
return 0;
out_error:
error = PTR_ERR(new);
-out_unlock:
unlock_page(page);
+out:
return error;
}
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index e55b5242614d..31f3df193bdb 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -290,7 +290,7 @@ out_free_buf:
return error;
}
-#define NFSD_MDS_PR_KEY 0x0100000000000000
+#define NFSD_MDS_PR_KEY 0x0100000000000000ULL
/*
* We use the client ID as a unique key for the reservations.
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 1580ea6fd64d..d08cd88155c7 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -104,22 +104,21 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
goto out;
inode = d_inode(fh->fh_dentry);
- if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
- error = -EOPNOTSUPP;
- goto out_errno;
- }
error = fh_want_write(fh);
if (error)
goto out_errno;
- error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS);
+ fh_lock(fh);
+
+ error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
if (error)
- goto out_drop_write;
- error = inode->i_op->set_acl(inode, argp->acl_default,
- ACL_TYPE_DEFAULT);
+ goto out_drop_lock;
+ error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
if (error)
- goto out_drop_write;
+ goto out_drop_lock;
+
+ fh_unlock(fh);
fh_drop_write(fh);
@@ -131,7 +130,8 @@ out:
posix_acl_release(argp->acl_access);
posix_acl_release(argp->acl_default);
return nfserr;
-out_drop_write:
+out_drop_lock:
+ fh_unlock(fh);
fh_drop_write(fh);
out_errno:
nfserr = nfserrno(error);
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 01df4cd7c753..0c890347cde3 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -95,22 +95,20 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
goto out;
inode = d_inode(fh->fh_dentry);
- if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
- error = -EOPNOTSUPP;
- goto out_errno;
- }
error = fh_want_write(fh);
if (error)
goto out_errno;
- error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS);
+ fh_lock(fh);
+
+ error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
if (error)
- goto out_drop_write;
- error = inode->i_op->set_acl(inode, argp->acl_default,
- ACL_TYPE_DEFAULT);
+ goto out_drop_lock;
+ error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
-out_drop_write:
+out_drop_lock:
+ fh_unlock(fh);
fh_drop_write(fh);
out_errno:
nfserr = nfserrno(error);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 6adabd6049b7..71292a0d6f09 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -770,9 +770,6 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
dentry = fhp->fh_dentry;
inode = d_inode(dentry);
- if (!inode->i_op->set_acl || !IS_POSIXACL(inode))
- return nfserr_attrnotsupp;
-
if (S_ISDIR(inode->i_mode))
flags = NFS4_ACL_DIR;
@@ -782,16 +779,19 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (host_error < 0)
goto out_nfserr;
- host_error = inode->i_op->set_acl(inode, pacl, ACL_TYPE_ACCESS);
+ fh_lock(fhp);
+
+ host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl);
if (host_error < 0)
- goto out_release;
+ goto out_drop_lock;
if (S_ISDIR(inode->i_mode)) {
- host_error = inode->i_op->set_acl(inode, dpacl,
- ACL_TYPE_DEFAULT);
+ host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl);
}
-out_release:
+out_drop_lock:
+ fh_unlock(fhp);
+
posix_acl_release(pacl);
posix_acl_release(dpacl);
out_nfserr:
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7389cb1d7409..04c68d900324 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -710,22 +710,6 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc
}
}
-static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args)
-{
- struct rpc_xprt *xprt;
-
- if (args->protocol != XPRT_TRANSPORT_BC_TCP)
- return rpc_create(args);
-
- xprt = args->bc_xprt->xpt_bc_xprt;
- if (xprt) {
- xprt_get(xprt);
- return rpc_create_xprt(args, xprt);
- }
-
- return rpc_create(args);
-}
-
static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
{
int maxtime = max_cb_time(clp->net);
@@ -768,7 +752,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
args.authflavor = ses->se_cb_sec.flavor;
}
/* Create RPC client */
- client = create_backchannel_client(&args);
+ client = rpc_create(&args);
if (IS_ERR(client)) {
dprintk("NFSD: couldn't create callback client: %ld\n",
PTR_ERR(client));
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f5f82e145018..70d0b9b33031 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3480,12 +3480,17 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
}
static struct nfs4_ol_stateid *
-init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
- struct nfsd4_open *open)
+init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
{
struct nfs4_openowner *oo = open->op_openowner;
struct nfs4_ol_stateid *retstp = NULL;
+ struct nfs4_ol_stateid *stp;
+
+ stp = open->op_stp;
+ /* We are moving these outside of the spinlocks to avoid the warnings */
+ mutex_init(&stp->st_mutex);
+ mutex_lock(&stp->st_mutex);
spin_lock(&oo->oo_owner.so_client->cl_lock);
spin_lock(&fp->fi_lock);
@@ -3493,6 +3498,8 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
retstp = nfsd4_find_existing_open(fp, open);
if (retstp)
goto out_unlock;
+
+ open->op_stp = NULL;
atomic_inc(&stp->st_stid.sc_count);
stp->st_stid.sc_type = NFS4_OPEN_STID;
INIT_LIST_HEAD(&stp->st_locks);
@@ -3502,14 +3509,19 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
stp->st_access_bmap = 0;
stp->st_deny_bmap = 0;
stp->st_openstp = NULL;
- init_rwsem(&stp->st_rwsem);
list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
list_add(&stp->st_perfile, &fp->fi_stateids);
out_unlock:
spin_unlock(&fp->fi_lock);
spin_unlock(&oo->oo_owner.so_client->cl_lock);
- return retstp;
+ if (retstp) {
+ mutex_lock(&retstp->st_mutex);
+ /* To keep mutex tracking happy */
+ mutex_unlock(&stp->st_mutex);
+ stp = retstp;
+ }
+ return stp;
}
/*
@@ -4305,7 +4317,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
struct nfs4_client *cl = open->op_openowner->oo_owner.so_client;
struct nfs4_file *fp = NULL;
struct nfs4_ol_stateid *stp = NULL;
- struct nfs4_ol_stateid *swapstp = NULL;
struct nfs4_delegation *dp = NULL;
__be32 status;
@@ -4335,32 +4346,28 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
*/
if (stp) {
/* Stateid was found, this is an OPEN upgrade */
- down_read(&stp->st_rwsem);
+ mutex_lock(&stp->st_mutex);
status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
if (status) {
- up_read(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
goto out;
}
} else {
- stp = open->op_stp;
- open->op_stp = NULL;
- swapstp = init_open_stateid(stp, fp, open);
- if (swapstp) {
- nfs4_put_stid(&stp->st_stid);
- stp = swapstp;
- down_read(&stp->st_rwsem);
+ /* stp is returned locked. */
+ stp = init_open_stateid(fp, open);
+ /* See if we lost the race to some other thread */
+ if (stp->st_access_bmap != 0) {
status = nfs4_upgrade_open(rqstp, fp, current_fh,
stp, open);
if (status) {
- up_read(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
goto out;
}
goto upgrade_out;
}
- down_read(&stp->st_rwsem);
status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
if (status) {
- up_read(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
release_open_stateid(stp);
goto out;
}
@@ -4372,7 +4379,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
}
upgrade_out:
nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid);
- up_read(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
if (nfsd4_has_session(&resp->cstate)) {
if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
@@ -4977,12 +4984,12 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
* revoked delegations are kept only for free_stateid.
*/
return nfserr_bad_stateid;
- down_write(&stp->st_rwsem);
+ mutex_lock(&stp->st_mutex);
status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
if (status == nfs_ok)
status = nfs4_check_fh(current_fh, &stp->st_stid);
if (status != nfs_ok)
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
return status;
}
@@ -5030,7 +5037,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
return status;
oo = openowner(stp->st_stateowner);
if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
nfs4_put_stid(&stp->st_stid);
return nfserr_bad_stateid;
}
@@ -5062,12 +5069,12 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
oo = openowner(stp->st_stateowner);
status = nfserr_bad_stateid;
if (oo->oo_flags & NFS4_OO_CONFIRMED) {
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
goto put_stateid;
}
oo->oo_flags |= NFS4_OO_CONFIRMED;
nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid);
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
@@ -5143,7 +5150,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid);
status = nfs_ok;
put_stateid:
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
@@ -5196,7 +5203,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
nfsd4_close_open_stateid(stp);
@@ -5422,7 +5429,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
stp->st_access_bmap = 0;
stp->st_deny_bmap = open_stp->st_deny_bmap;
stp->st_openstp = open_stp;
- init_rwsem(&stp->st_rwsem);
+ mutex_init(&stp->st_mutex);
list_add(&stp->st_locks, &open_stp->st_locks);
list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
spin_lock(&fp->fi_lock);
@@ -5591,7 +5598,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&open_stp, nn);
if (status)
goto out;
- up_write(&open_stp->st_rwsem);
+ mutex_unlock(&open_stp->st_mutex);
open_sop = openowner(open_stp->st_stateowner);
status = nfserr_bad_stateid;
if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
@@ -5600,7 +5607,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = lookup_or_create_lock_state(cstate, open_stp, lock,
&lock_stp, &new);
if (status == nfs_ok)
- down_write(&lock_stp->st_rwsem);
+ mutex_lock(&lock_stp->st_mutex);
} else {
status = nfs4_preprocess_seqid_op(cstate,
lock->lk_old_lock_seqid,
@@ -5704,7 +5711,7 @@ out:
seqid_mutating_err(ntohl(status)))
lock_sop->lo_owner.so_seqid++;
- up_write(&lock_stp->st_rwsem);
+ mutex_unlock(&lock_stp->st_mutex);
/*
* If this is a new, never-before-used stateid, and we are
@@ -5874,7 +5881,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fput:
fput(filp);
put_stateid:
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 986e51e5ceac..64053eadeb81 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -535,7 +535,7 @@ struct nfs4_ol_stateid {
unsigned char st_access_bmap;
unsigned char st_deny_bmap;
struct nfs4_ol_stateid *st_openstp;
- struct rw_semaphore st_rwsem;
+ struct mutex st_mutex;
};
static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 809bd2de7ad0..e9fd241b9a0a 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -439,7 +439,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp)
if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC)
return 0;
bytes = le16_to_cpu(sbp->s_bytes);
- if (bytes > BLOCK_SIZE)
+ if (bytes < sumoff + 4 || bytes > BLOCK_SIZE)
return 0;
crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp,
sumoff);
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index e27e6527912b..4342c7ee7d20 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -1,7 +1,5 @@
ccflags-y := -Ifs/ocfs2
-ccflags-y += -DCATCH_BH_JBD_RACES
-
obj-$(CONFIG_OCFS2_FS) += \
ocfs2.o \
ocfs2_stackglue.o
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index fe50ded1b4ce..498641eed2db 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -139,11 +139,16 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
lock_buffer(bh);
if (buffer_jbd(bh)) {
+#ifdef CATCH_BH_JBD_RACES
mlog(ML_ERROR,
"block %llu had the JBD bit set "
"while I was in lock_buffer!",
(unsigned long long)bh->b_blocknr);
BUG();
+#else
+ unlock_buffer(bh);
+ continue;
+#endif
}
clear_buffer_uptodate(bh);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 22f0253a3567..5c9d2d80ff70 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -405,12 +405,21 @@ static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev,
err = ovl_create_upper(dentry, inode, &stat, link, hardlink);
} else {
const struct cred *old_cred;
+ struct cred *override_cred;
old_cred = ovl_override_creds(dentry->d_sb);
- err = ovl_create_over_whiteout(dentry, inode, &stat, link,
- hardlink);
+ err = -ENOMEM;
+ override_cred = prepare_creds();
+ if (override_cred) {
+ override_cred->fsuid = old_cred->fsuid;
+ override_cred->fsgid = old_cred->fsgid;
+ put_cred(override_creds(override_cred));
+ put_cred(override_cred);
+ err = ovl_create_over_whiteout(dentry, inode, &stat,
+ link, hardlink);
+ }
revert_creds(old_cred);
}
@@ -496,6 +505,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
struct dentry *upper;
struct dentry *opaquedir = NULL;
int err;
+ int flags = 0;
if (WARN_ON(!workdir))
return -EROFS;
@@ -525,46 +535,39 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
if (err)
goto out_dput;
- whiteout = ovl_whiteout(workdir, dentry);
- err = PTR_ERR(whiteout);
- if (IS_ERR(whiteout))
+ upper = lookup_one_len(dentry->d_name.name, upperdir,
+ dentry->d_name.len);
+ err = PTR_ERR(upper);
+ if (IS_ERR(upper))
goto out_unlock;
- upper = ovl_dentry_upper(dentry);
- if (!upper) {
- upper = lookup_one_len(dentry->d_name.name, upperdir,
- dentry->d_name.len);
- err = PTR_ERR(upper);
- if (IS_ERR(upper))
- goto kill_whiteout;
-
- err = ovl_do_rename(wdir, whiteout, udir, upper, 0);
- dput(upper);
- if (err)
- goto kill_whiteout;
- } else {
- int flags = 0;
+ err = -ESTALE;
+ if ((opaquedir && upper != opaquedir) ||
+ (!opaquedir && ovl_dentry_upper(dentry) &&
+ upper != ovl_dentry_upper(dentry))) {
+ goto out_dput_upper;
+ }
- if (opaquedir)
- upper = opaquedir;
- err = -ESTALE;
- if (upper->d_parent != upperdir)
- goto kill_whiteout;
+ whiteout = ovl_whiteout(workdir, dentry);
+ err = PTR_ERR(whiteout);
+ if (IS_ERR(whiteout))
+ goto out_dput_upper;
- if (is_dir)
- flags |= RENAME_EXCHANGE;
+ if (d_is_dir(upper))
+ flags = RENAME_EXCHANGE;
- err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
- if (err)
- goto kill_whiteout;
+ err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
+ if (err)
+ goto kill_whiteout;
+ if (flags)
+ ovl_cleanup(wdir, upper);
- if (is_dir)
- ovl_cleanup(wdir, upper);
- }
ovl_dentry_version_inc(dentry->d_parent);
out_d_drop:
d_drop(dentry);
dput(whiteout);
+out_dput_upper:
+ dput(upper);
out_unlock:
unlock_rename(workdir, upperdir);
out_dput:
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 0ed7c4012437..d1cdc60dd68f 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -59,16 +59,40 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
if (err)
goto out;
+ if (attr->ia_valid & ATTR_SIZE) {
+ struct inode *realinode = d_inode(ovl_dentry_real(dentry));
+
+ err = -ETXTBSY;
+ if (atomic_read(&realinode->i_writecount) < 0)
+ goto out_drop_write;
+ }
+
err = ovl_copy_up(dentry);
if (!err) {
+ struct inode *winode = NULL;
+
upperdentry = ovl_dentry_upper(dentry);
+ if (attr->ia_valid & ATTR_SIZE) {
+ winode = d_inode(upperdentry);
+ err = get_write_access(winode);
+ if (err)
+ goto out_drop_write;
+ }
+
+ if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
+ attr->ia_valid &= ~ATTR_MODE;
+
inode_lock(upperdentry->d_inode);
err = notify_change(upperdentry, attr, NULL);
if (!err)
ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
inode_unlock(upperdentry->d_inode);
+
+ if (winode)
+ put_write_access(winode);
}
+out_drop_write:
ovl_drop_write(dentry);
out:
return err;
@@ -121,16 +145,18 @@ int ovl_permission(struct inode *inode, int mask)
err = vfs_getattr(&realpath, &stat);
if (err)
- return err;
+ goto out_dput;
+ err = -ESTALE;
if ((stat.mode ^ inode->i_mode) & S_IFMT)
- return -ESTALE;
+ goto out_dput;
inode->i_mode = stat.mode;
inode->i_uid = stat.uid;
inode->i_gid = stat.gid;
- return generic_permission(inode, mask);
+ err = generic_permission(inode, mask);
+ goto out_dput;
}
/* Careful in RCU walk mode */
@@ -238,41 +264,27 @@ out:
return err;
}
-static bool ovl_need_xattr_filter(struct dentry *dentry,
- enum ovl_path_type type)
-{
- if ((type & (__OVL_PATH_PURE | __OVL_PATH_UPPER)) == __OVL_PATH_UPPER)
- return S_ISDIR(dentry->d_inode->i_mode);
- else
- return false;
-}
-
ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode,
const char *name, void *value, size_t size)
{
- struct path realpath;
- enum ovl_path_type type = ovl_path_real(dentry, &realpath);
+ struct dentry *realdentry = ovl_dentry_real(dentry);
- if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
+ if (ovl_is_private_xattr(name))
return -ENODATA;
- return vfs_getxattr(realpath.dentry, name, value, size);
+ return vfs_getxattr(realdentry, name, value, size);
}
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
{
- struct path realpath;
- enum ovl_path_type type = ovl_path_real(dentry, &realpath);
+ struct dentry *realdentry = ovl_dentry_real(dentry);
ssize_t res;
int off;
- res = vfs_listxattr(realpath.dentry, list, size);
+ res = vfs_listxattr(realdentry, list, size);
if (res <= 0 || size == 0)
return res;
- if (!ovl_need_xattr_filter(dentry, type))
- return res;
-
/* filter out private xattrs */
for (off = 0; off < res;) {
char *s = list + off;
@@ -302,7 +314,7 @@ int ovl_removexattr(struct dentry *dentry, const char *name)
goto out;
err = -ENODATA;
- if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
+ if (ovl_is_private_xattr(name))
goto out_drop_write;
if (!OVL_TYPE_UPPER(type)) {
@@ -401,12 +413,11 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
if (!inode)
return NULL;
- mode &= S_IFMT;
-
inode->i_ino = get_next_ino();
inode->i_mode = mode;
inode->i_flags |= S_NOATIME | S_NOCMTIME;
+ mode &= S_IFMT;
switch (mode) {
case S_IFDIR:
inode->i_private = oe;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 4bd9b5ba8f42..cfbca53590d0 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -187,6 +187,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
{
to->i_uid = from->i_uid;
to->i_gid = from->i_gid;
+ to->i_mode = from->i_mode;
}
/* dir.c */
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index ce02f46029da..9a7693d5f8ff 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1082,11 +1082,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (err < 0)
goto out_put_workdir;
- if (!err) {
- pr_err("overlayfs: upper fs needs to support d_type.\n");
- err = -EINVAL;
- goto out_put_workdir;
- }
+ /*
+ * We allowed this configuration and don't want to
+ * break users over kernel upgrade. So warn instead
+ * of erroring out.
+ */
+ if (!err)
+ pr_warn("overlayfs: upper fs needs to support d_type.\n");
}
}
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 8a4a266beff3..edc452c2a563 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -820,39 +820,43 @@ posix_acl_xattr_get(const struct xattr_handler *handler,
return error;
}
-static int
-posix_acl_xattr_set(const struct xattr_handler *handler,
- struct dentry *unused, struct inode *inode,
- const char *name, const void *value,
- size_t size, int flags)
+int
+set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
{
- struct posix_acl *acl = NULL;
- int ret;
-
if (!IS_POSIXACL(inode))
return -EOPNOTSUPP;
if (!inode->i_op->set_acl)
return -EOPNOTSUPP;
- if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
- return value ? -EACCES : 0;
+ if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
if (!inode_owner_or_capable(inode))
return -EPERM;
+ if (acl) {
+ int ret = posix_acl_valid(acl);
+ if (ret)
+ return ret;
+ }
+ return inode->i_op->set_acl(inode, acl, type);
+}
+EXPORT_SYMBOL(set_posix_acl);
+
+static int
+posix_acl_xattr_set(const struct xattr_handler *handler,
+ struct dentry *unused, struct inode *inode,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ struct posix_acl *acl = NULL;
+ int ret;
+
if (value) {
acl = posix_acl_from_xattr(&init_user_ns, value, size);
if (IS_ERR(acl))
return PTR_ERR(acl);
-
- if (acl) {
- ret = posix_acl_valid(acl);
- if (ret)
- goto out;
- }
}
-
- ret = inode->i_op->set_acl(inode, acl, handler->flags);
-out:
+ ret = set_posix_acl(inode, handler->flags, acl);
posix_acl_release(acl);
return ret;
}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 55bc7d6c8aac..06702783bf40 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -121,6 +121,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
if (IS_ERR(sb))
return ERR_CAST(sb);
+ /*
+ * procfs isn't actually a stacking filesystem; however, there is
+ * too much magic going on inside it to permit stacking things on
+ * top of it
+ */
+ sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
+
if (!proc_parse_options(options, ns)) {
deactivate_locked_super(sb);
return ERR_PTR(-EINVAL);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b8f2d1e8c645..c72c16c5a60f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1393,7 +1393,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
unsigned long safe_mask = 0;
unsigned int commit_max_age = (unsigned int)-1;
struct reiserfs_journal *journal = SB_JOURNAL(s);
- char *new_opts = kstrdup(arg, GFP_KERNEL);
+ char *new_opts;
int err;
char *qf_names[REISERFS_MAXQUOTAS];
unsigned int qfmt = 0;
@@ -1401,6 +1401,10 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
int i;
#endif
+ new_opts = kstrdup(arg, GFP_KERNEL);
+ if (arg && !new_opts)
+ return -ENOMEM;
+
sync_filesystem(s);
reiserfs_write_lock(s);
@@ -1546,7 +1550,8 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
}
out_ok_unlocked:
- replace_mount_options(s, new_opts);
+ if (new_opts)
+ replace_mount_options(s, new_opts);
return 0;
out_err_unlock:
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 08316972ff93..7bbf420d1289 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -52,6 +52,7 @@
#include "ubifs.h"
#include <linux/mount.h>
#include <linux/slab.h>
+#include <linux/migrate.h>
static int read_block(struct inode *inode, void *addr, unsigned int block,
struct ubifs_data_node *dn)
@@ -1452,6 +1453,26 @@ static int ubifs_set_page_dirty(struct page *page)
return ret;
}
+#ifdef CONFIG_MIGRATION
+static int ubifs_migrate_page(struct address_space *mapping,
+ struct page *newpage, struct page *page, enum migrate_mode mode)
+{
+ int rc;
+
+ rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
+ if (rc != MIGRATEPAGE_SUCCESS)
+ return rc;
+
+ if (PagePrivate(page)) {
+ ClearPagePrivate(page);
+ SetPagePrivate(newpage);
+ }
+
+ migrate_page_copy(newpage, page);
+ return MIGRATEPAGE_SUCCESS;
+}
+#endif
+
static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
{
/*
@@ -1591,6 +1612,9 @@ const struct address_space_operations ubifs_file_address_operations = {
.write_end = ubifs_write_end,
.invalidatepage = ubifs_invalidatepage,
.set_page_dirty = ubifs_set_page_dirty,
+#ifdef CONFIG_MIGRATION
+ .migratepage = ubifs_migrate_page,
+#endif
.releasepage = ubifs_releasepage,
};
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index 5f861ed287c3..888c364b2fe9 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -295,7 +295,8 @@ static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block,
map = &UDF_SB(sb)->s_partmaps[partition];
/* map to sparable/physical partition desc */
phyblock = udf_get_pblock(sb, eloc.logicalBlockNum,
- map->s_partition_num, ext_offset + offset);
+ map->s_type_specific.s_metadata.s_phys_partition_ref,
+ ext_offset + offset);
}
brelse(epos.bh);
@@ -317,14 +318,18 @@ uint32_t udf_get_pblock_meta25(struct super_block *sb, uint32_t block,
mdata = &map->s_type_specific.s_metadata;
inode = mdata->s_metadata_fe ? : mdata->s_mirror_fe;
- /* We shouldn't mount such media... */
- BUG_ON(!inode);
+ if (!inode)
+ return 0xFFFFFFFF;
+
retblk = udf_try_read_meta(inode, block, partition, offset);
if (retblk == 0xFFFFFFFF && mdata->s_metadata_fe) {
udf_warn(sb, "error reading from METADATA, trying to read from MIRROR\n");
if (!(mdata->s_flags & MF_MIRROR_FE_LOADED)) {
mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb,
- mdata->s_mirror_file_loc, map->s_partition_num);
+ mdata->s_mirror_file_loc,
+ mdata->s_phys_partition_ref);
+ if (IS_ERR(mdata->s_mirror_fe))
+ mdata->s_mirror_fe = NULL;
mdata->s_flags |= MF_MIRROR_FE_LOADED;
}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 5e2c8c814e1b..4942549e7dc8 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -951,13 +951,13 @@ out2:
}
struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
- u32 meta_file_loc, u32 partition_num)
+ u32 meta_file_loc, u32 partition_ref)
{
struct kernel_lb_addr addr;
struct inode *metadata_fe;
addr.logicalBlockNum = meta_file_loc;
- addr.partitionReferenceNum = partition_num;
+ addr.partitionReferenceNum = partition_ref;
metadata_fe = udf_iget_special(sb, &addr);
@@ -974,7 +974,8 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
return metadata_fe;
}
-static int udf_load_metadata_files(struct super_block *sb, int partition)
+static int udf_load_metadata_files(struct super_block *sb, int partition,
+ int type1_index)
{
struct udf_sb_info *sbi = UDF_SB(sb);
struct udf_part_map *map;
@@ -984,20 +985,21 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
map = &sbi->s_partmaps[partition];
mdata = &map->s_type_specific.s_metadata;
+ mdata->s_phys_partition_ref = type1_index;
/* metadata address */
udf_debug("Metadata file location: block = %d part = %d\n",
- mdata->s_meta_file_loc, map->s_partition_num);
+ mdata->s_meta_file_loc, mdata->s_phys_partition_ref);
fe = udf_find_metadata_inode_efe(sb, mdata->s_meta_file_loc,
- map->s_partition_num);
+ mdata->s_phys_partition_ref);
if (IS_ERR(fe)) {
/* mirror file entry */
udf_debug("Mirror metadata file location: block = %d part = %d\n",
- mdata->s_mirror_file_loc, map->s_partition_num);
+ mdata->s_mirror_file_loc, mdata->s_phys_partition_ref);
fe = udf_find_metadata_inode_efe(sb, mdata->s_mirror_file_loc,
- map->s_partition_num);
+ mdata->s_phys_partition_ref);
if (IS_ERR(fe)) {
udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n");
@@ -1015,7 +1017,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
*/
if (mdata->s_bitmap_file_loc != 0xFFFFFFFF) {
addr.logicalBlockNum = mdata->s_bitmap_file_loc;
- addr.partitionReferenceNum = map->s_partition_num;
+ addr.partitionReferenceNum = mdata->s_phys_partition_ref;
udf_debug("Bitmap file location: block = %d part = %d\n",
addr.logicalBlockNum, addr.partitionReferenceNum);
@@ -1283,7 +1285,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
p = (struct partitionDesc *)bh->b_data;
partitionNumber = le16_to_cpu(p->partitionNumber);
- /* First scan for TYPE1, SPARABLE and METADATA partitions */
+ /* First scan for TYPE1 and SPARABLE partitions */
for (i = 0; i < sbi->s_partitions; i++) {
map = &sbi->s_partmaps[i];
udf_debug("Searching map: (%d == %d)\n",
@@ -1333,7 +1335,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
goto out_bh;
if (map->s_partition_type == UDF_METADATA_MAP25) {
- ret = udf_load_metadata_files(sb, i);
+ ret = udf_load_metadata_files(sb, i, type1_idx);
if (ret < 0) {
udf_err(sb, "error loading MetaData partition map %d\n",
i);
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 27b5335730c9..c13875d669c0 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -61,6 +61,11 @@ struct udf_meta_data {
__u32 s_bitmap_file_loc;
__u32 s_alloc_unit_size;
__u16 s_align_unit_size;
+ /*
+ * Partition Reference Number of the associated physical / sparable
+ * partition
+ */
+ __u16 s_phys_partition_ref;
int s_flags;
struct inode *s_metadata_fe;
struct inode *s_mirror_fe;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index dbca7375deef..63a6ff2cfc68 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1575,6 +1575,12 @@ xfs_ioc_swapext(
goto out_put_tmp_file;
}
+ if (f.file->f_op != &xfs_file_operations ||
+ tmp.file->f_op != &xfs_file_operations) {
+ error = -EINVAL;
+ goto out_put_tmp_file;
+ }
+
ip = XFS_I(file_inode(f.file));
tip = XFS_I(file_inode(tmp.file));