summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/anon_inodes.c23
-rw-r--r--fs/btrfs/backref.h4
-rw-r--r--fs/btrfs/block-group.h2
-rw-r--r--fs/btrfs/direct-io.c4
-rw-r--r--fs/btrfs/disk-io.c25
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/free-space-tree.c56
-rw-r--r--fs/btrfs/inode.c129
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/ordered-data.c14
-rw-r--r--fs/btrfs/raid56.c5
-rw-r--r--fs/btrfs/tests/extent-io-tests.c6
-rw-r--r--fs/btrfs/tree-log.c147
-rw-r--r--fs/btrfs/volumes.c6
-rw-r--r--fs/btrfs/zstd.c2
-rw-r--r--fs/cachefiles/io.c2
-rw-r--r--fs/cachefiles/ondemand.c4
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/efivarfs/super.c6
-rw-r--r--fs/erofs/data.c21
-rw-r--r--fs/erofs/decompressor.c12
-rw-r--r--fs/erofs/fileio.c6
-rw-r--r--fs/erofs/internal.h6
-rw-r--r--fs/erofs/zdata.c13
-rw-r--r--fs/erofs/zmap.c9
-rw-r--r--fs/eventpoll.c12
-rw-r--r--fs/exec.c9
-rw-r--r--fs/f2fs/file.c38
-rw-r--r--fs/f2fs/super.c30
-rw-r--r--fs/fuse/dir.c11
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/isofs/inode.c9
-rw-r--r--fs/libfs.c8
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c18
-rw-r--r--fs/netfs/buffered_write.c2
-rw-r--r--fs/netfs/direct_write.c8
-rw-r--r--fs/netfs/misc.c26
-rw-r--r--fs/netfs/read_pgpriv2.c5
-rw-r--r--fs/netfs/write_retry.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c120
-rw-r--r--fs/nfs/inode.c68
-rw-r--r--fs/nfs/nfs4proc.c25
-rw-r--r--fs/nfs/pnfs.c4
-rw-r--r--fs/nilfs2/inode.c9
-rw-r--r--fs/notify/dnotify/dnotify.c8
-rw-r--r--fs/overlayfs/util.c4
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/proc_sysctl.c18
-rw-r--r--fs/proc/task_mmu.c16
-rw-r--r--fs/smb/client/cifs_debug.c23
-rw-r--r--fs/smb/client/cifsglob.h4
-rw-r--r--fs/smb/client/cifspdu.h6
-rw-r--r--fs/smb/client/cifsproto.h1
-rw-r--r--fs/smb/client/cifssmb.c3
-rw-r--r--fs/smb/client/connect.c73
-rw-r--r--fs/smb/client/file.c10
-rw-r--r--fs/smb/client/fs_context.c17
-rw-r--r--fs/smb/client/misc.c14
-rw-r--r--fs/smb/client/readdir.c2
-rw-r--r--fs/smb/client/reparse.c42
-rw-r--r--fs/smb/client/sess.c21
-rw-r--r--fs/smb/client/smb2inode.c3
-rw-r--r--fs/smb/client/smb2ops.c24
-rw-r--r--fs/smb/client/smb2pdu.c11
-rw-r--r--fs/smb/client/smbdirect.c544
-rw-r--r--fs/smb/client/smbdirect.h64
-rw-r--r--fs/smb/client/trace.h24
-rw-r--r--fs/smb/common/smbdirect/smbdirect.h37
-rw-r--r--fs/smb/common/smbdirect/smbdirect_pdu.h55
-rw-r--r--fs/smb/common/smbdirect/smbdirect_socket.h43
-rw-r--r--fs/smb/server/connection.h1
-rw-r--r--fs/smb/server/smb2pdu.c101
-rw-r--r--fs/smb/server/smb2pdu.h3
-rw-r--r--fs/smb/server/transport_rdma.c5
-rw-r--r--fs/smb/server/vfs.c1
-rw-r--r--fs/xfs/libxfs/xfs_group.c14
-rw-r--r--fs/xfs/xfs_extent_busy.h8
-rw-r--r--fs/xfs/xfs_rtalloc.c2
79 files changed, 1316 insertions, 808 deletions
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index e51e7d88980a..1d847a939f29 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -98,14 +98,25 @@ static struct file_system_type anon_inode_fs_type = {
.kill_sb = kill_anon_super,
};
-static struct inode *anon_inode_make_secure_inode(
- const char *name,
- const struct inode *context_inode)
+/**
+ * anon_inode_make_secure_inode - allocate an anonymous inode with security context
+ * @sb: [in] Superblock to allocate from
+ * @name: [in] Name of the class of the newfile (e.g., "secretmem")
+ * @context_inode:
+ * [in] Optional parent inode for security inheritance
+ *
+ * The function ensures proper security initialization through the LSM hook
+ * security_inode_init_security_anon().
+ *
+ * Return: Pointer to new inode on success, ERR_PTR on failure.
+ */
+struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name,
+ const struct inode *context_inode)
{
struct inode *inode;
int error;
- inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
+ inode = alloc_anon_inode(sb);
if (IS_ERR(inode))
return inode;
inode->i_flags &= ~S_PRIVATE;
@@ -118,6 +129,7 @@ static struct inode *anon_inode_make_secure_inode(
}
return inode;
}
+EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm");
static struct file *__anon_inode_getfile(const char *name,
const struct file_operations *fops,
@@ -132,7 +144,8 @@ static struct file *__anon_inode_getfile(const char *name,
return ERR_PTR(-ENOENT);
if (make_inode) {
- inode = anon_inode_make_secure_inode(name, context_inode);
+ inode = anon_inode_make_secure_inode(anon_inode_mnt->mnt_sb,
+ name, context_inode);
if (IS_ERR(inode)) {
file = ERR_CAST(inode);
goto err;
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 74e614031274..953637115956 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -423,8 +423,8 @@ struct btrfs_backref_node *btrfs_backref_alloc_node(
struct btrfs_backref_edge *btrfs_backref_alloc_edge(
struct btrfs_backref_cache *cache);
-#define LINK_LOWER (1 << 0)
-#define LINK_UPPER (1 << 1)
+#define LINK_LOWER (1U << 0)
+#define LINK_UPPER (1U << 1)
void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
struct btrfs_backref_node *lower,
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 36937eeab9b8..08548b0b5ea1 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -83,6 +83,8 @@ enum btrfs_block_group_flags {
BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
/* Does the block group need to be added to the free space tree? */
BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
+ /* Set after we add a new block group to the free space tree. */
+ BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
/* Indicate that the block group is placed on a sequential zone */
BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE,
/*
diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c
index a374ce7a1813..7900cba56225 100644
--- a/fs/btrfs/direct-io.c
+++ b/fs/btrfs/direct-io.c
@@ -151,8 +151,8 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
}
ordered = btrfs_alloc_ordered_extent(inode, start, file_extent,
- (1 << type) |
- (1 << BTRFS_ORDERED_DIRECT));
+ (1U << type) |
+ (1U << BTRFS_ORDERED_DIRECT));
if (IS_ERR(ordered)) {
if (em) {
free_extent_map(em);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index aa58e0663a5d..321feb99c179 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2164,8 +2164,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
found = true;
root = read_tree_root_path(tree_root, path, &key);
if (IS_ERR(root)) {
- if (!btrfs_test_opt(fs_info, IGNOREBADROOTS))
- ret = PTR_ERR(root);
+ ret = PTR_ERR(root);
break;
}
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
@@ -4385,8 +4384,8 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
*
* So wait for all ongoing ordered extents to complete and then run
* delayed iputs. This works because once we reach this point no one
- * can either create new ordered extents nor create delayed iputs
- * through some other means.
+ * can create new ordered extents, but delayed iputs can still be added
+ * by a reclaim worker (see comments further below).
*
* Also note that btrfs_wait_ordered_roots() is not safe here, because
* it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent,
@@ -4397,15 +4396,29 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_flush_workqueue(fs_info->endio_write_workers);
/* Ordered extents for free space inodes. */
btrfs_flush_workqueue(fs_info->endio_freespace_worker);
+ /*
+ * Run delayed iputs in case an async reclaim worker is waiting for them
+ * to be run as mentioned above.
+ */
btrfs_run_delayed_iputs(fs_info);
- /* There should be no more workload to generate new delayed iputs. */
- set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state);
cancel_work_sync(&fs_info->async_reclaim_work);
cancel_work_sync(&fs_info->async_data_reclaim_work);
cancel_work_sync(&fs_info->preempt_reclaim_work);
cancel_work_sync(&fs_info->em_shrinker_work);
+ /*
+ * Run delayed iputs again because an async reclaim worker may have
+ * added new ones if it was flushing delalloc:
+ *
+ * shrink_delalloc() -> btrfs_start_delalloc_roots() ->
+ * start_delalloc_inodes() -> btrfs_add_delayed_iput()
+ */
+ btrfs_run_delayed_iputs(fs_info);
+
+ /* There should be no more workload to generate new delayed iputs. */
+ set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state);
+
/* Cancel or finish ongoing discard work */
btrfs_discard_cleanup(fs_info);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f5b28b5c4908..91d9cb5ff401 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -79,7 +79,7 @@ enum {
* single word in a bitmap may straddle two pages in the extent buffer.
*/
#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
-#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
+#define BYTE_MASK ((1U << BITS_PER_BYTE) - 1)
#define BITMAP_FIRST_BYTE_MASK(start) \
((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
#define BITMAP_LAST_BYTE_MASK(nbits) \
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 39c6b96a4c25..64af363f36dd 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1099,11 +1099,21 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
if (ret < 0)
goto out_locked;
- ASSERT(ret == 0);
+ /*
+ * If ret is 1 (no key found), it means this is an empty block group,
+ * without any extents allocated from it and there's no block group
+ * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree
+ * because we are using the block group tree feature, so block group
+ * items are stored in the block group tree. It also means there are no
+ * extents allocated for block groups with a start offset beyond this
+ * block group's end offset (this is the last, highest, block group).
+ */
+ if (!btrfs_fs_compat_ro(trans->fs_info, BLOCK_GROUP_TREE))
+ ASSERT(ret == 0);
start = block_group->start;
end = block_group->start + block_group->length;
- while (1) {
+ while (ret == 0) {
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.type == BTRFS_EXTENT_ITEM_KEY ||
@@ -1133,8 +1143,6 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
ret = btrfs_next_item(extent_root, path);
if (ret < 0)
goto out_locked;
- if (ret)
- break;
}
if (start < end) {
ret = __add_to_free_space_tree(trans, block_group, path2,
@@ -1217,6 +1225,7 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans,
{
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
+ struct rb_node *node;
int nr;
int ret;
@@ -1245,6 +1254,16 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
}
+ node = rb_first_cached(&trans->fs_info->block_group_cache_tree);
+ while (node) {
+ struct btrfs_block_group *bg;
+
+ bg = rb_entry(node, struct btrfs_block_group, cache_node);
+ clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags);
+ node = rb_next(node);
+ cond_resched();
+ }
+
return 0;
}
@@ -1334,12 +1353,18 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
block_group = rb_entry(node, struct btrfs_block_group,
cache_node);
+
+ if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
+ &block_group->runtime_flags))
+ goto next;
+
ret = populate_free_space_tree(trans, block_group);
if (ret) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
+next:
if (btrfs_should_end_transaction(trans)) {
btrfs_end_transaction(trans);
trans = btrfs_start_transaction(free_space_root, 1);
@@ -1366,6 +1391,29 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags);
+ /*
+ * While rebuilding the free space tree we may allocate new metadata
+ * block groups while modifying the free space tree.
+ *
+ * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we
+ * can use multiple transactions, every time btrfs_end_transaction() is
+ * called at btrfs_rebuild_free_space_tree() we finish the creation of
+ * new block groups by calling btrfs_create_pending_block_groups(), and
+ * that in turn calls us, through add_block_group_free_space(), to add
+ * a free space info item and a free space extent item for the block
+ * group.
+ *
+ * Then later btrfs_rebuild_free_space_tree() may find such new block
+ * groups and processes them with populate_free_space_tree(), which can
+ * fail with EEXIST since there are already items for the block group in
+ * the free space tree. Notice that we say "may find" because a new
+ * block group may be added to the block groups rbtree in a node before
+ * or after the block group currently being processed by the rebuild
+ * process. So signal the rebuild process to skip such new block groups
+ * if it finds them.
+ */
+ set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
+
ret = add_new_free_space_info(trans, block_group, path);
if (ret)
return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8a3f44302788..f18f4d59d389 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1171,7 +1171,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
free_extent_map(em);
ordered = btrfs_alloc_ordered_extent(inode, start, &file_extent,
- 1 << BTRFS_ORDERED_COMPRESSED);
+ 1U << BTRFS_ORDERED_COMPRESSED);
if (IS_ERR(ordered)) {
btrfs_drop_extent_map_range(inode, start, end, false);
ret = PTR_ERR(ordered);
@@ -1418,7 +1418,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
free_extent_map(em);
ordered = btrfs_alloc_ordered_extent(inode, start, &file_extent,
- 1 << BTRFS_ORDERED_REGULAR);
+ 1U << BTRFS_ORDERED_REGULAR);
if (IS_ERR(ordered)) {
unlock_extent(&inode->io_tree, start,
start + cur_alloc_size - 1, &cached);
@@ -1999,8 +1999,8 @@ static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio
ordered = btrfs_alloc_ordered_extent(inode, file_pos, &nocow_args->file_extent,
is_prealloc
- ? (1 << BTRFS_ORDERED_PREALLOC)
- : (1 << BTRFS_ORDERED_NOCOW));
+ ? (1U << BTRFS_ORDERED_PREALLOC)
+ : (1U << BTRFS_ORDERED_NOCOW));
if (IS_ERR(ordered)) {
if (is_prealloc)
btrfs_drop_extent_map_range(inode, file_pos, end, false);
@@ -4724,7 +4724,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
int ret = 0;
struct btrfs_trans_handle *trans;
- u64 last_unlink_trans;
struct fscrypt_name fname;
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
@@ -4750,6 +4749,23 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
goto out_notrans;
}
+ /*
+ * Propagate the last_unlink_trans value of the deleted dir to its
+ * parent directory. This is to prevent an unrecoverable log tree in the
+ * case we do something like this:
+ * 1) create dir foo
+ * 2) create snapshot under dir foo
+ * 3) delete the snapshot
+ * 4) rmdir foo
+ * 5) mkdir foo
+ * 6) fsync foo or some file inside foo
+ *
+ * This is because we can't unlink other roots when replaying the dir
+ * deletes for directory foo.
+ */
+ if (BTRFS_I(inode)->last_unlink_trans >= trans->transid)
+ btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
+
if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
ret = btrfs_unlink_subvol(trans, BTRFS_I(dir), dentry);
goto out;
@@ -4759,27 +4775,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
if (ret)
goto out;
- last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
-
/* now the directory is empty */
ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
&fname.disk_name);
- if (!ret) {
+ if (!ret)
btrfs_i_size_write(BTRFS_I(inode), 0);
- /*
- * Propagate the last_unlink_trans value of the deleted dir to
- * its parent directory. This is to prevent an unrecoverable
- * log tree in the case we do something like this:
- * 1) create dir foo
- * 2) create snapshot under dir foo
- * 3) delete the snapshot
- * 4) rmdir foo
- * 5) mkdir foo
- * 6) fsync foo or some file inside foo
- */
- if (last_unlink_trans >= trans->transid)
- BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
- }
out:
btrfs_end_transaction(trans);
out_notrans:
@@ -7979,6 +7979,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
int ret;
int ret2;
bool need_abort = false;
+ bool logs_pinned = false;
struct fscrypt_name old_fname, new_fname;
struct fscrypt_str *old_name, *new_name;
@@ -8102,6 +8103,31 @@ static int btrfs_rename_exchange(struct inode *old_dir,
inode_inc_iversion(new_inode);
simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID &&
+ new_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ /*
+ * If we are renaming in the same directory (and it's not for
+ * root entries) pin the log early to prevent any concurrent
+ * task from logging the directory after we removed the old
+ * entries and before we add the new entries, otherwise that
+ * task can sync a log without any entry for the inodes we are
+ * renaming and therefore replaying that log, if a power failure
+ * happens after syncing the log, would result in deleting the
+ * inodes.
+ *
+ * If the rename affects two different directories, we want to
+ * make sure the that there's no log commit that contains
+ * updates for only one of the directories but not for the
+ * other.
+ *
+ * If we are renaming an entry for a root, we don't care about
+ * log updates since we called btrfs_set_log_full_commit().
+ */
+ btrfs_pin_log_trans(root);
+ btrfs_pin_log_trans(dest);
+ logs_pinned = true;
+ }
+
if (old_dentry->d_parent != new_dentry->d_parent) {
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
BTRFS_I(old_inode), true);
@@ -8173,30 +8199,23 @@ static int btrfs_rename_exchange(struct inode *old_dir,
BTRFS_I(new_inode)->dir_index = new_idx;
/*
- * Now pin the logs of the roots. We do it to ensure that no other task
- * can sync the logs while we are in progress with the rename, because
- * that could result in an inconsistency in case any of the inodes that
- * are part of this rename operation were logged before.
+ * Do the log updates for all inodes.
+ *
+ * If either entry is for a root we don't need to update the logs since
+ * we've called btrfs_set_log_full_commit() before.
*/
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
- btrfs_pin_log_trans(root);
- if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
- btrfs_pin_log_trans(dest);
-
- /* Do the log updates for all inodes. */
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+ if (logs_pinned) {
btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
old_rename_ctx.index, new_dentry->d_parent);
- if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir),
new_rename_ctx.index, old_dentry->d_parent);
+ }
- /* Now unpin the logs. */
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+out_fail:
+ if (logs_pinned) {
btrfs_end_log_trans(root);
- if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
btrfs_end_log_trans(dest);
-out_fail:
+ }
ret2 = btrfs_end_transaction(trans);
ret = ret ? ret : ret2;
out_notrans:
@@ -8246,6 +8265,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
int ret2;
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
struct fscrypt_name old_fname, new_fname;
+ bool logs_pinned = false;
if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return -EPERM;
@@ -8380,6 +8400,29 @@ static int btrfs_rename(struct mnt_idmap *idmap,
inode_inc_iversion(old_inode);
simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ /*
+ * If we are renaming in the same directory (and it's not a
+ * root entry) pin the log to prevent any concurrent task from
+ * logging the directory after we removed the old entry and
+ * before we add the new entry, otherwise that task can sync
+ * a log without any entry for the inode we are renaming and
+ * therefore replaying that log, if a power failure happens
+ * after syncing the log, would result in deleting the inode.
+ *
+ * If the rename affects two different directories, we want to
+ * make sure the that there's no log commit that contains
+ * updates for only one of the directories but not for the
+ * other.
+ *
+ * If we are renaming an entry for a root, we don't care about
+ * log updates since we called btrfs_set_log_full_commit().
+ */
+ btrfs_pin_log_trans(root);
+ btrfs_pin_log_trans(dest);
+ logs_pinned = true;
+ }
+
if (old_dentry->d_parent != new_dentry->d_parent)
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
BTRFS_I(old_inode), true);
@@ -8444,7 +8487,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
if (old_inode->i_nlink == 1)
BTRFS_I(old_inode)->dir_index = index;
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
+ if (logs_pinned)
btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
rename_ctx.index, new_dentry->d_parent);
@@ -8460,6 +8503,10 @@ static int btrfs_rename(struct mnt_idmap *idmap,
}
}
out_fail:
+ if (logs_pinned) {
+ btrfs_end_log_trans(root);
+ btrfs_end_log_trans(dest);
+ }
ret2 = btrfs_end_transaction(trans);
ret = ret ? ret : ret2;
out_notrans:
@@ -9733,8 +9780,8 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
free_extent_map(em);
ordered = btrfs_alloc_ordered_extent(inode, start, &file_extent,
- (1 << BTRFS_ORDERED_ENCODED) |
- (1 << BTRFS_ORDERED_COMPRESSED));
+ (1U << BTRFS_ORDERED_ENCODED) |
+ (1U << BTRFS_ORDERED_COMPRESSED));
if (IS_ERR(ordered)) {
btrfs_drop_extent_map_range(inode, start, end, false);
ret = PTR_ERR(ordered);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 63aeacc54945..b70ef4455610 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -666,14 +666,14 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
goto out;
}
+ btrfs_record_new_subvolume(trans, BTRFS_I(dir));
+
ret = btrfs_create_new_inode(trans, &new_inode_args);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
- btrfs_record_new_subvolume(trans, BTRFS_I(dir));
-
d_instantiate_new(dentry, new_inode_args.inode);
new_inode_args.inode = NULL;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 03c945711003..1d7ddd664c0d 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -153,9 +153,10 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
struct btrfs_ordered_extent *entry;
int ret;
u64 qgroup_rsv = 0;
+ const bool is_nocow = (flags &
+ ((1U << BTRFS_ORDERED_NOCOW) | (1U << BTRFS_ORDERED_PREALLOC)));
- if (flags &
- ((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) {
+ if (is_nocow) {
/* For nocow write, we can release the qgroup rsv right now */
ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes, &qgroup_rsv);
if (ret < 0)
@@ -170,8 +171,13 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
return ERR_PTR(ret);
}
entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS);
- if (!entry)
+ if (!entry) {
+ if (!is_nocow)
+ btrfs_qgroup_free_refroot(inode->root->fs_info,
+ btrfs_root_id(inode->root),
+ qgroup_rsv, BTRFS_QGROUP_RSV_DATA);
return ERR_PTR(-ENOMEM);
+ }
entry->file_offset = file_offset;
entry->num_bytes = num_bytes;
@@ -253,7 +259,7 @@ static void insert_ordered_extent(struct btrfs_ordered_extent *entry)
* @disk_bytenr: Offset of extent on disk.
* @disk_num_bytes: Size of extent on disk.
* @offset: Offset into unencoded data where file data starts.
- * @flags: Flags specifying type of extent (1 << BTRFS_ORDERED_*).
+ * @flags: Flags specifying type of extent (1U << BTRFS_ORDERED_*).
* @compress_type: Compression algorithm used for data.
*
* Most of these parameters correspond to &struct btrfs_file_extent_item. The
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index cdd373c27784..7285bf7926e3 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -200,8 +200,7 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
struct btrfs_stripe_hash_table *x;
struct btrfs_stripe_hash *cur;
struct btrfs_stripe_hash *h;
- int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
- int i;
+ unsigned int num_entries = 1U << BTRFS_STRIPE_HASH_TABLE_BITS;
if (info->stripe_hash_table)
return 0;
@@ -222,7 +221,7 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
h = table->table;
- for (i = 0; i < num_entries; i++) {
+ for (unsigned int i = 0; i < num_entries; i++) {
cur = h + i;
INIT_LIST_HEAD(&cur->hash_list);
spin_lock_init(&cur->lock);
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 74aca7180a5a..400574c6e82e 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -14,9 +14,9 @@
#include "../disk-io.h"
#include "../btrfs_inode.h"
-#define PROCESS_UNLOCK (1 << 0)
-#define PROCESS_RELEASE (1 << 1)
-#define PROCESS_TEST_LOCKED (1 << 2)
+#define PROCESS_UNLOCK (1U << 0)
+#define PROCESS_RELEASE (1U << 1)
+#define PROCESS_TEST_LOCKED (1U << 2)
static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
unsigned long flags)
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f5af11565b87..e05140ce95be 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -143,6 +143,9 @@ static struct btrfs_inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *r
unsigned int nofs_flag;
struct btrfs_inode *inode;
+ /* Only meant to be called for subvolume roots and not for log roots. */
+ ASSERT(is_fstree(btrfs_root_id(root)));
+
/*
* We're holding a transaction handle whether we are logging or
* replaying a log tree, so we must make sure NOFS semantics apply
@@ -604,21 +607,6 @@ static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len,
return 0;
}
-/*
- * simple helper to read an inode off the disk from a given root
- * This can only be called for subvolume roots and not for the log
- */
-static noinline struct btrfs_inode *read_one_inode(struct btrfs_root *root,
- u64 objectid)
-{
- struct btrfs_inode *inode;
-
- inode = btrfs_iget_logging(objectid, root);
- if (IS_ERR(inode))
- return NULL;
- return inode;
-}
-
/* replays a single extent in 'eb' at 'slot' with 'key' into the
* subvolume 'root'. path is released on entry and should be released
* on exit.
@@ -668,15 +656,12 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
extent_end = ALIGN(start + size,
fs_info->sectorsize);
} else {
- ret = 0;
- goto out;
+ return 0;
}
- inode = read_one_inode(root, key->objectid);
- if (!inode) {
- ret = -EIO;
- goto out;
- }
+ inode = btrfs_iget_logging(key->objectid, root);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
/*
* first check to see if we already have this extent in the
@@ -948,9 +933,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
- inode = read_one_inode(root, location.objectid);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(location.objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ inode = NULL;
goto out;
}
@@ -961,7 +947,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
ret = unlink_inode_for_log_replay(trans, dir, inode, &name);
out:
kfree(name.name);
- iput(&inode->vfs_inode);
+ if (inode)
+ iput(&inode->vfs_inode);
return ret;
}
@@ -1072,7 +1059,9 @@ again:
search_key.type = BTRFS_INODE_REF_KEY;
search_key.offset = parent_objectid;
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
- if (ret == 0) {
+ if (ret < 0) {
+ return ret;
+ } else if (ret == 0) {
struct btrfs_inode_ref *victim_ref;
unsigned long ptr;
unsigned long ptr_end;
@@ -1145,13 +1134,13 @@ again:
struct fscrypt_str victim_name;
extref = (struct btrfs_inode_extref *)(base + cur_offset);
+ victim_name.len = btrfs_inode_extref_name_len(leaf, extref);
if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
goto next;
ret = read_alloc_one_name(leaf, &extref->name,
- btrfs_inode_extref_name_len(leaf, extref),
- &victim_name);
+ victim_name.len, &victim_name);
if (ret)
return ret;
@@ -1166,18 +1155,18 @@ again:
kfree(victim_name.name);
return ret;
} else if (!ret) {
- ret = -ENOENT;
- victim_parent = read_one_inode(root,
- parent_objectid);
- if (victim_parent) {
+ victim_parent = btrfs_iget_logging(parent_objectid, root);
+ if (IS_ERR(victim_parent)) {
+ ret = PTR_ERR(victim_parent);
+ } else {
inc_nlink(&inode->vfs_inode);
btrfs_release_path(path);
ret = unlink_inode_for_log_replay(trans,
victim_parent,
inode, &victim_name);
+ iput(&victim_parent->vfs_inode);
}
- iput(&victim_parent->vfs_inode);
kfree(victim_name.name);
if (ret)
return ret;
@@ -1314,9 +1303,9 @@ again:
struct btrfs_inode *dir;
btrfs_release_path(path);
- dir = read_one_inode(root, parent_id);
- if (!dir) {
- ret = -ENOENT;
+ dir = btrfs_iget_logging(parent_id, root);
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
kfree(name.name);
goto out;
}
@@ -1388,15 +1377,17 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
* copy the back ref in. The link count fixup code will take
* care of the rest
*/
- dir = read_one_inode(root, parent_objectid);
- if (!dir) {
- ret = -ENOENT;
+ dir = btrfs_iget_logging(parent_objectid, root);
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
+ dir = NULL;
goto out;
}
- inode = read_one_inode(root, inode_objectid);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(inode_objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ inode = NULL;
goto out;
}
@@ -1408,11 +1399,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
* parent object can change from one array
* item to another.
*/
- if (!dir)
- dir = read_one_inode(root, parent_objectid);
if (!dir) {
- ret = -ENOENT;
- goto out;
+ dir = btrfs_iget_logging(parent_objectid, root);
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
+ dir = NULL;
+ goto out;
+ }
}
} else {
ret = ref_get_fields(eb, ref_ptr, &name, &ref_index);
@@ -1681,9 +1674,9 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
break;
btrfs_release_path(path);
- inode = read_one_inode(root, key.offset);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(key.offset, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
break;
}
@@ -1719,9 +1712,9 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode;
struct inode *vfs_inode;
- inode = read_one_inode(root, objectid);
- if (!inode)
- return -EIO;
+ inode = btrfs_iget_logging(objectid, root);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
vfs_inode = &inode->vfs_inode;
key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
@@ -1760,14 +1753,14 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir;
int ret;
- inode = read_one_inode(root, location->objectid);
- if (!inode)
- return -ENOENT;
+ inode = btrfs_iget_logging(location->objectid, root);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
- dir = read_one_inode(root, dirid);
- if (!dir) {
+ dir = btrfs_iget_logging(dirid, root);
+ if (IS_ERR(dir)) {
iput(&inode->vfs_inode);
- return -EIO;
+ return PTR_ERR(dir);
}
ret = btrfs_add_link(trans, dir, inode, name, 1, index);
@@ -1844,9 +1837,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
bool update_size = true;
bool name_added = false;
- dir = read_one_inode(root, key->objectid);
- if (!dir)
- return -EIO;
+ dir = btrfs_iget_logging(key->objectid, root);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name);
if (ret)
@@ -2146,9 +2139,10 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
btrfs_dir_item_key_to_cpu(eb, di, &location);
btrfs_release_path(path);
btrfs_release_path(log_path);
- inode = read_one_inode(root, location.objectid);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(location.objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ inode = NULL;
goto out;
}
@@ -2300,14 +2294,17 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
if (!log_path)
return -ENOMEM;
- dir = read_one_inode(root, dirid);
- /* it isn't an error if the inode isn't there, that can happen
- * because we replay the deletes before we copy in the inode item
- * from the log
+ dir = btrfs_iget_logging(dirid, root);
+ /*
+ * It isn't an error if the inode isn't there, that can happen because
+ * we replay the deletes before we copy in the inode item from the log.
*/
- if (!dir) {
+ if (IS_ERR(dir)) {
btrfs_free_path(log_path);
- return 0;
+ ret = PTR_ERR(dir);
+ if (ret == -ENOENT)
+ ret = 0;
+ return ret;
}
range_start = 0;
@@ -2466,9 +2463,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
struct btrfs_inode *inode;
u64 from;
- inode = read_one_inode(root, key.objectid);
- if (!inode) {
- ret = -EIO;
+ inode = btrfs_iget_logging(key.objectid, root);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
break;
}
from = ALIGN(i_size_read(&inode->vfs_inode),
@@ -7447,6 +7444,8 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
* full log sync.
* Also we don't need to worry with renames, since btrfs_rename() marks the log
* for full commit when renaming a subvolume.
+ *
+ * Must be called before creating the subvolume entry in its parent directory.
*/
void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans,
struct btrfs_inode *dir)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8e6b6fed7429..9ad222c4391f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3281,6 +3281,12 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
device->bytes_used - dev_extent_len);
atomic64_add(dev_extent_len, &fs_info->free_chunk_space);
btrfs_clear_space_info_full(fs_info);
+
+ if (list_empty(&device->post_commit_list)) {
+ list_add_tail(&device->post_commit_list,
+ &trans->transaction->dev_update_list);
+ }
+
mutex_unlock(&fs_info->chunk_mutex);
}
}
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index 3541efa765c7..94c19331823a 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -24,7 +24,7 @@
#include "super.h"
#define ZSTD_BTRFS_MAX_WINDOWLOG 17
-#define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
+#define ZSTD_BTRFS_MAX_INPUT (1U << ZSTD_BTRFS_MAX_WINDOWLOG)
#define ZSTD_BTRFS_DEFAULT_LEVEL 3
#define ZSTD_BTRFS_MIN_LEVEL -15
#define ZSTD_BTRFS_MAX_LEVEL 15
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index c08e4a66ac07..3e0576d9db1d 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -347,8 +347,6 @@ int __cachefiles_write(struct cachefiles_object *object,
default:
ki->was_async = false;
cachefiles_write_complete(&ki->iocb, ret);
- if (ret > 0)
- ret = 0;
break;
}
diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
index d9bc67176128..a7ed86fa98bb 100644
--- a/fs/cachefiles/ondemand.c
+++ b/fs/cachefiles/ondemand.c
@@ -83,10 +83,8 @@ static ssize_t cachefiles_ondemand_fd_write_iter(struct kiocb *kiocb,
trace_cachefiles_ondemand_fd_write(object, file_inode(file), pos, len);
ret = __cachefiles_write(object, file, pos, iter, NULL, NULL);
- if (!ret) {
- ret = len;
+ if (ret > 0)
kiocb->ki_pos += ret;
- }
out:
fput(file);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 851d70200c6b..a7254cab44cc 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -2616,7 +2616,7 @@ static int ceph_zero_objects(struct inode *inode, loff_t offset, loff_t length)
s32 stripe_unit = ci->i_layout.stripe_unit;
s32 stripe_count = ci->i_layout.stripe_count;
s32 object_size = ci->i_layout.object_size;
- u64 object_set_size = object_size * stripe_count;
+ u64 object_set_size = (u64) object_size * stripe_count;
u64 nearly, t;
/* round offset up to next period boundary */
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 0486e9b68bc6..f681814fe8bb 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -387,10 +387,16 @@ static int efivarfs_reconfigure(struct fs_context *fc)
return 0;
}
+static void efivarfs_free(struct fs_context *fc)
+{
+ kfree(fc->s_fs_info);
+}
+
static const struct fs_context_operations efivarfs_context_ops = {
.get_tree = efivarfs_get_tree,
.parse_param = efivarfs_parse_param,
.reconfigure = efivarfs_reconfigure,
+ .free = efivarfs_free,
};
struct efivarfs_ctx {
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 2409d2ab0c28..33cb0a7330d2 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -213,9 +213,11 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
/*
* bit 30: I/O error occurred on this folio
+ * bit 29: CPU has dirty data in D-cache (needs aliasing handling);
* bit 0 - 29: remaining parts to complete this folio
*/
-#define EROFS_ONLINEFOLIO_EIO (1 << 30)
+#define EROFS_ONLINEFOLIO_EIO 30
+#define EROFS_ONLINEFOLIO_DIRTY 29
void erofs_onlinefolio_init(struct folio *folio)
{
@@ -232,19 +234,23 @@ void erofs_onlinefolio_split(struct folio *folio)
atomic_inc((atomic_t *)&folio->private);
}
-void erofs_onlinefolio_end(struct folio *folio, int err)
+void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty)
{
int orig, v;
do {
orig = atomic_read((atomic_t *)&folio->private);
- v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0);
+ DBG_BUGON(orig <= 0);
+ v = dirty << EROFS_ONLINEFOLIO_DIRTY;
+ v |= (orig - 1) | (!!err << EROFS_ONLINEFOLIO_EIO);
} while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);
- if (v & ~EROFS_ONLINEFOLIO_EIO)
+ if (v & (BIT(EROFS_ONLINEFOLIO_DIRTY) - 1))
return;
folio->private = 0;
- folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO));
+ if (v & BIT(EROFS_ONLINEFOLIO_DIRTY))
+ flush_dcache_folio(folio);
+ folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO)));
}
static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
@@ -350,11 +356,16 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
*/
static int erofs_read_folio(struct file *file, struct folio *folio)
{
+ trace_erofs_read_folio(folio, true);
+
return iomap_read_folio(folio, &erofs_iomap_ops);
}
static void erofs_readahead(struct readahead_control *rac)
{
+ trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
+ readahead_count(rac), true);
+
return iomap_readahead(rac, &erofs_iomap_ops);
}
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index bf62e2836b60..358061d7b660 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -301,13 +301,11 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
cur = min(cur, rq->outputsize);
if (cur && rq->out[0]) {
kin = kmap_local_page(rq->in[nrpages_in - 1]);
- if (rq->out[0] == rq->in[nrpages_in - 1]) {
+ if (rq->out[0] == rq->in[nrpages_in - 1])
memmove(kin + rq->pageofs_out, kin + pi, cur);
- flush_dcache_page(rq->out[0]);
- } else {
+ else
memcpy_to_page(rq->out[0], rq->pageofs_out,
kin + pi, cur);
- }
kunmap_local(kin);
}
rq->outputsize -= cur;
@@ -325,14 +323,12 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
po = (rq->pageofs_out + cur + pi) & ~PAGE_MASK;
DBG_BUGON(no >= nrpages_out);
cnt = min(insz - pi, PAGE_SIZE - po);
- if (rq->out[no] == rq->in[ni]) {
+ if (rq->out[no] == rq->in[ni])
memmove(kin + po,
kin + rq->pageofs_in + pi, cnt);
- flush_dcache_page(rq->out[no]);
- } else if (rq->out[no]) {
+ else if (rq->out[no])
memcpy_to_page(rq->out[no], po,
kin + rq->pageofs_in + pi, cnt);
- }
pi += cnt;
} while (pi < insz);
kunmap_local(kin);
diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
index 60c7cc4c105c..da1304a9bb43 100644
--- a/fs/erofs/fileio.c
+++ b/fs/erofs/fileio.c
@@ -38,7 +38,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
} else {
bio_for_each_folio_all(fi, &rq->bio) {
DBG_BUGON(folio_test_uptodate(fi.folio));
- erofs_onlinefolio_end(fi.folio, ret);
+ erofs_onlinefolio_end(fi.folio, ret, false);
}
}
bio_uninit(&rq->bio);
@@ -158,7 +158,7 @@ io_retry:
}
cur += len;
}
- erofs_onlinefolio_end(folio, err);
+ erofs_onlinefolio_end(folio, err, false);
return err;
}
@@ -180,7 +180,7 @@ static void erofs_fileio_readahead(struct readahead_control *rac)
struct folio *folio;
int err;
- trace_erofs_readpages(inode, readahead_index(rac),
+ trace_erofs_readahead(inode, readahead_index(rac),
readahead_count(rac), true);
while ((folio = readahead_folio(rac))) {
err = erofs_fileio_scan_folio(&io, folio);
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 4ac188d5d894..2e8823653c2e 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -314,10 +314,12 @@ static inline struct folio *erofs_grab_folio_nowait(struct address_space *as,
/* The length of extent is full */
#define EROFS_MAP_FULL_MAPPED 0x0008
/* Located in the special packed inode */
-#define EROFS_MAP_FRAGMENT 0x0010
+#define __EROFS_MAP_FRAGMENT 0x0010
/* The extent refers to partial decompressed data */
#define EROFS_MAP_PARTIAL_REF 0x0020
+#define EROFS_MAP_FRAGMENT (EROFS_MAP_MAPPED | __EROFS_MAP_FRAGMENT)
+
struct erofs_map_blocks {
struct erofs_buf buf;
@@ -389,7 +391,7 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map);
void erofs_onlinefolio_init(struct folio *folio);
void erofs_onlinefolio_split(struct folio *folio);
-void erofs_onlinefolio_end(struct folio *folio, int err);
+void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty);
struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid);
int erofs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask,
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index b8e6b76c23d5..d21ae4802c7f 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1003,7 +1003,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f,
if (!(map->m_flags & EROFS_MAP_MAPPED)) {
folio_zero_segment(folio, cur, end);
tight = false;
- } else if (map->m_flags & EROFS_MAP_FRAGMENT) {
+ } else if (map->m_flags & __EROFS_MAP_FRAGMENT) {
erofs_off_t fpos = offset + cur - map->m_la;
err = z_erofs_read_fragment(inode->i_sb, folio, cur,
@@ -1060,7 +1060,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f,
tight = (bs == PAGE_SIZE);
}
} while ((end = cur) > 0);
- erofs_onlinefolio_end(folio, err);
+ erofs_onlinefolio_end(folio, err, false);
return err;
}
@@ -1165,7 +1165,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_backend *be, int err)
cur += len;
}
kunmap_local(dst);
- erofs_onlinefolio_end(page_folio(bvi->bvec.page), err);
+ erofs_onlinefolio_end(page_folio(bvi->bvec.page), err, true);
list_del(p);
kfree(bvi);
}
@@ -1324,7 +1324,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
DBG_BUGON(z_erofs_page_is_invalidated(page));
if (!z_erofs_is_shortlived_page(page)) {
- erofs_onlinefolio_end(page_folio(page), err);
+ erofs_onlinefolio_end(page_folio(page), err, true);
continue;
}
if (pcl->algorithmformat != Z_EROFS_COMPRESSION_LZ4) {
@@ -1855,13 +1855,12 @@ static void z_erofs_readahead(struct readahead_control *rac)
{
struct inode *const inode = rac->mapping->host;
Z_EROFS_DEFINE_FRONTEND(f, inode, readahead_pos(rac));
- struct folio *head = NULL, *folio;
unsigned int nrpages = readahead_count(rac);
+ struct folio *head = NULL, *folio;
int err;
+ trace_erofs_readahead(inode, readahead_index(rac), nrpages, false);
z_erofs_pcluster_readmore(&f, rac, true);
- nrpages = readahead_count(rac);
- trace_erofs_readpages(inode, readahead_index(rac), nrpages, false);
while ((folio = readahead_folio(rac))) {
folio->private = head;
head = folio;
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 0bebc6e3a4d7..f1a15ff22147 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -413,8 +413,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode,
!vi->z_tailextent_headlcn) {
map->m_la = 0;
map->m_llen = inode->i_size;
- map->m_flags = EROFS_MAP_MAPPED |
- EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT;
+ map->m_flags = EROFS_MAP_FRAGMENT;
return 0;
}
initial_lcn = ofs >> lclusterbits;
@@ -489,7 +488,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode,
goto unmap_out;
}
} else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
- map->m_flags |= EROFS_MAP_FRAGMENT;
+ map->m_flags = EROFS_MAP_FRAGMENT;
} else {
map->m_pa = erofs_pos(sb, m.pblk);
err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
@@ -617,7 +616,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode,
if (lstart < lend) {
map->m_la = lstart;
if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) {
- map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FRAGMENT;
+ map->m_flags = EROFS_MAP_FRAGMENT;
vi->z_fragmentoff = map->m_plen;
if (recsz > offsetof(struct z_erofs_extent, pstart_lo))
vi->z_fragmentoff |= map->m_pa << 32;
@@ -797,7 +796,7 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
iomap->length = map.m_llen;
if (map.m_flags & EROFS_MAP_MAPPED) {
iomap->type = IOMAP_MAPPED;
- iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ?
+ iomap->addr = map.m_flags & __EROFS_MAP_FRAGMENT ?
IOMAP_NULL_ADDR : map.m_pa;
} else {
iomap->type = IOMAP_HOLE;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index d4dbffdedd08..0fbf5dfedb24 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -883,7 +883,7 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
kfree_rcu(epi, rcu);
percpu_counter_dec(&ep->user->epoll_watches);
- return ep_refcount_dec_and_test(ep);
+ return true;
}
/*
@@ -891,14 +891,14 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
*/
static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi)
{
- WARN_ON_ONCE(__ep_remove(ep, epi, false));
+ if (__ep_remove(ep, epi, false))
+ WARN_ON_ONCE(ep_refcount_dec_and_test(ep));
}
static void ep_clear_and_put(struct eventpoll *ep)
{
struct rb_node *rbp, *next;
struct epitem *epi;
- bool dispose;
/* We need to release all tasks waiting for these file */
if (waitqueue_active(&ep->poll_wait))
@@ -931,10 +931,8 @@ static void ep_clear_and_put(struct eventpoll *ep)
cond_resched();
}
- dispose = ep_refcount_dec_and_test(ep);
mutex_unlock(&ep->mtx);
-
- if (dispose)
+ if (ep_refcount_dec_and_test(ep))
ep_free(ep);
}
@@ -1137,7 +1135,7 @@ again:
dispose = __ep_remove(ep, epi, true);
mutex_unlock(&ep->mtx);
- if (dispose)
+ if (dispose && ep_refcount_dec_and_test(ep))
ep_free(ep);
goto again;
}
diff --git a/fs/exec.c b/fs/exec.c
index 8e4ea5f1e64c..a18d5875c8d9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -111,6 +111,9 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
bool path_noexec(const struct path *path)
{
+ /* If it's an anonymous inode make sure that we catch any shenanigans. */
+ VFS_WARN_ON_ONCE(IS_ANON_FILE(d_inode(path->dentry)) &&
+ !(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC));
return (path->mnt->mnt_flags & MNT_NOEXEC) ||
(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
}
@@ -894,13 +897,15 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
if (IS_ERR(file))
return file;
+ if (path_noexec(&file->f_path))
+ return ERR_PTR(-EACCES);
+
/*
* In the past the regular type check was here. It moved to may_open() in
* 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is
* an invariant that all non-regular files error out before we get here.
*/
- if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
- path_noexec(&file->f_path))
+ if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)))
return ERR_PTR(-EACCES);
err = exe_file_deny_write_access(file);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index abbcbb5865a3..f2de3c886c08 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -35,6 +35,17 @@
#include <trace/events/f2fs.h>
#include <uapi/linux/f2fs.h>
+static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size)
+{
+ loff_t old_size = i_size_read(inode);
+
+ if (old_size >= new_size)
+ return;
+
+ /* zero or drop pages only in range of [old_size, new_size] */
+ truncate_pagecache(inode, old_size);
+}
+
static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
@@ -103,8 +114,13 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
+ filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT);
+ filemap_invalidate_unlock(inode->i_mapping);
+
file_update_time(vmf->vma->vm_file);
filemap_invalidate_lock_shared(inode->i_mapping);
+
folio_lock(folio);
if (unlikely(folio->mapping != inode->i_mapping ||
folio_pos(folio) > i_size_read(inode) ||
@@ -1106,6 +1122,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
+ if (attr->ia_size > old_size)
+ f2fs_zero_post_eof_page(inode, attr->ia_size);
truncate_setsize(inode, attr->ia_size);
if (attr->ia_size <= old_size)
@@ -1224,6 +1242,10 @@ static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
+ filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len);
+ filemap_invalidate_unlock(inode->i_mapping);
+
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1507,6 +1529,8 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len);
+
f2fs_lock_op(sbi);
f2fs_drop_extent_tree(inode);
truncate_pagecache(inode, offset);
@@ -1628,6 +1652,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
+ filemap_invalidate_lock(mapping);
+ f2fs_zero_post_eof_page(inode, offset + len);
+ filemap_invalidate_unlock(mapping);
+
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1759,6 +1787,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
/* avoid gc operation during block exchange */
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(mapping);
+
+ f2fs_zero_post_eof_page(inode, offset + len);
truncate_pagecache(inode, offset);
while (!ret && idx > pg_start) {
@@ -1816,6 +1846,10 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
if (err)
return err;
+ filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len);
+ filemap_invalidate_unlock(inode->i_mapping);
+
f2fs_balance_fs(sbi, true);
pg_start = ((unsigned long long)offset) >> PAGE_SHIFT;
@@ -4846,6 +4880,10 @@ static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
err = file_modified(file);
if (err)
return err;
+
+ filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from));
+ filemap_invalidate_unlock(inode->i_mapping);
return count;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index bc510c91f3eb..86dd30eb50b1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1806,26 +1806,32 @@ static int f2fs_statfs_project(struct super_block *sb,
limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
dquot->dq_dqb.dqb_bhardlimit);
- if (limit)
- limit >>= sb->s_blocksize_bits;
+ limit >>= sb->s_blocksize_bits;
+
+ if (limit) {
+ uint64_t remaining = 0;
- if (limit && buf->f_blocks > limit) {
curblock = (dquot->dq_dqb.dqb_curspace +
dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
- buf->f_blocks = limit;
- buf->f_bfree = buf->f_bavail =
- (buf->f_blocks > curblock) ?
- (buf->f_blocks - curblock) : 0;
+ if (limit > curblock)
+ remaining = limit - curblock;
+
+ buf->f_blocks = min(buf->f_blocks, limit);
+ buf->f_bfree = min(buf->f_bfree, remaining);
+ buf->f_bavail = min(buf->f_bavail, remaining);
}
limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
dquot->dq_dqb.dqb_ihardlimit);
- if (limit && buf->f_files > limit) {
- buf->f_files = limit;
- buf->f_ffree =
- (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
- (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
+ if (limit) {
+ uint64_t remaining = 0;
+
+ if (limit > dquot->dq_dqb.dqb_curinodes)
+ remaining = limit - dquot->dq_dqb.dqb_curinodes;
+
+ buf->f_files = min(buf->f_files, limit);
+ buf->f_ffree = min(buf->f_ffree, remaining);
}
spin_unlock(&dquot->dq_dqb_lock);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 83ac192e7fdd..fa90309030e2 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1946,6 +1946,7 @@ int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
int err;
bool trust_local_cmtime = is_wb;
bool fault_blocked = false;
+ u64 attr_version;
if (!fc->default_permissions)
attr->ia_valid |= ATTR_FORCE;
@@ -2030,6 +2031,8 @@ int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
inarg.valid |= FATTR_KILL_SUIDGID;
}
+
+ attr_version = fuse_get_attr_version(fm->fc);
fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
err = fuse_simple_request(fm, &args);
if (err) {
@@ -2055,6 +2058,14 @@ int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
/* FIXME: clear I_DIRTY_SYNC? */
}
+ if (fi->attr_version > attr_version) {
+ /*
+ * Apply attributes, for example for fsnotify_change(), but set
+ * attribute timeout to zero.
+ */
+ outarg.attr_valid = outarg.attr_valid_nsec = 0;
+ }
+
fuse_change_attributes_common(inode, &outarg.attr, NULL,
ATTR_TIMEOUT(&outarg),
fuse_get_cache_mask(inode), 0);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fd48e8d37f2e..fa6d6b728ffc 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -9,6 +9,7 @@
#include "fuse_i.h"
#include "dev_uring_i.h"
+#include <linux/dax.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/file.h>
@@ -162,6 +163,9 @@ static void fuse_evict_inode(struct inode *inode)
/* Will write inode on close/munmap and in all other dirtiers */
WARN_ON(inode->i_state & I_DIRTY_INODE);
+ if (FUSE_IS_DAX(inode))
+ dax_break_layout_final(inode);
+
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
if (inode->i_sb->s_flags & SB_ACTIVE) {
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index d5da9817df9b..33e6a620c103 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1440,9 +1440,16 @@ static int isofs_read_inode(struct inode *inode, int relocated)
inode->i_op = &page_symlink_inode_operations;
inode_nohighmem(inode);
inode->i_data.a_ops = &isofs_symlink_aops;
- } else
+ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
/* XXX - parse_rock_ridge_inode() had already set i_rdev. */
init_special_inode(inode, inode->i_mode, inode->i_rdev);
+ } else {
+ printk(KERN_DEBUG "ISOFS: Invalid file type 0%04o for inode %lu.\n",
+ inode->i_mode, inode->i_ino);
+ ret = -EIO;
+ goto fail;
+ }
ret = 0;
out:
diff --git a/fs/libfs.c b/fs/libfs.c
index e28da9574a65..14e0e9b18c8e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1648,12 +1648,10 @@ struct inode *alloc_anon_inode(struct super_block *s)
*/
inode->i_state = I_DIRTY;
/*
- * Historically anonymous inodes didn't have a type at all and
- * userspace has come to rely on this. Internally they're just
- * regular files but S_IFREG is masked off when reporting
- * information to userspace.
+ * Historically anonymous inodes don't have a type at all and
+ * userspace has come to rely on this.
*/
- inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
+ inode->i_mode = S_IRUSR | S_IWUSR;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_flags |= S_PRIVATE | S_ANON_INODE;
diff --git a/fs/namei.c b/fs/namei.c
index 84a0e0b0111c..8e165c99feee 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3464,7 +3464,7 @@ static int may_open(struct mnt_idmap *idmap, const struct path *path,
return -EACCES;
break;
default:
- VFS_BUG_ON_INODE(1, inode);
+ VFS_BUG_ON_INODE(!IS_ANON_FILE(inode), inode);
}
error = inode_permission(idmap, inode, MAY_OPEN | acc_mode);
diff --git a/fs/namespace.c b/fs/namespace.c
index d6ac7e533b02..dfb72f827d4a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2765,14 +2765,14 @@ static int attach_recursive_mnt(struct mount *source_mnt,
hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
struct mount *q;
hlist_del_init(&child->mnt_hash);
- q = __lookup_mnt(&child->mnt_parent->mnt,
- child->mnt_mountpoint);
- if (q)
- mnt_change_mountpoint(child, smp, q);
/* Notice when we are propagating across user namespaces */
if (child->mnt_parent->mnt_ns->user_ns != user_ns)
lock_mnt_tree(child);
child->mnt.mnt_flags &= ~MNT_LOCKED;
+ q = __lookup_mnt(&child->mnt_parent->mnt,
+ child->mnt_mountpoint);
+ if (q)
+ mnt_change_mountpoint(child, smp, q);
commit_tree(child);
}
put_mountpoint(smp);
@@ -5307,16 +5307,12 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename,
kattr.kflags |= MOUNT_KATTR_RECURSE;
ret = wants_mount_setattr(uattr, usize, &kattr);
- if (ret < 0)
- return ret;
-
- if (ret) {
+ if (ret > 0) {
ret = do_mount_setattr(&file->f_path, &kattr);
- if (ret)
- return ret;
-
finish_mount_kattr(&kattr);
}
+ if (ret)
+ return ret;
}
fd = get_unused_fd_flags(flags & O_CLOEXEC);
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index dbb544e183d1..9f22ff890a8c 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -64,6 +64,7 @@ static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
return;
}
+ spin_lock(&inode->i_lock);
i_size_write(inode, pos);
#if IS_ENABLED(CONFIG_FSCACHE)
fscache_update_cookie(ctx->cache, NULL, &pos);
@@ -77,6 +78,7 @@ static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
DIV_ROUND_UP(pos, SECTOR_SIZE),
inode->i_blocks + add);
}
+ spin_unlock(&inode->i_lock);
}
/**
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index fa9a5bf3c6d5..3efa5894b2c0 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -14,13 +14,17 @@ static void netfs_cleanup_dio_write(struct netfs_io_request *wreq)
struct inode *inode = wreq->inode;
unsigned long long end = wreq->start + wreq->transferred;
- if (!wreq->error &&
- i_size_read(inode) < end) {
+ if (wreq->error || end <= i_size_read(inode))
+ return;
+
+ spin_lock(&inode->i_lock);
+ if (end > i_size_read(inode)) {
if (wreq->netfs_ops->update_i_size)
wreq->netfs_ops->update_i_size(inode, end);
else
i_size_write(inode, end);
}
+ spin_unlock(&inode->i_lock);
}
/*
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 43b67a28a8fa..8b1c11ef32aa 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -381,7 +381,12 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
static int netfs_collect_in_app(struct netfs_io_request *rreq,
bool (*collector)(struct netfs_io_request *rreq))
{
- bool need_collect = false, inactive = true;
+ bool need_collect = false, inactive = true, done = true;
+
+ if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_recollect);
+ return 1; /* Done */
+ }
for (int i = 0; i < NR_IO_STREAMS; i++) {
struct netfs_io_subrequest *subreq;
@@ -400,9 +405,11 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq,
need_collect = true;
break;
}
+ if (subreq || !test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags))
+ done = false;
}
- if (!need_collect && !inactive)
+ if (!need_collect && !inactive && !done)
return 0; /* Sleep */
__set_current_state(TASK_RUNNING);
@@ -423,8 +430,8 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq,
/*
* Wait for a request to complete, successfully or otherwise.
*/
-static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq,
- bool (*collector)(struct netfs_io_request *rreq))
+static ssize_t netfs_wait_for_in_progress(struct netfs_io_request *rreq,
+ bool (*collector)(struct netfs_io_request *rreq))
{
DEFINE_WAIT(myself);
ssize_t ret;
@@ -440,6 +447,9 @@ static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq,
case 1:
goto all_collected;
case 2:
+ if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
+ break;
+ cond_resched();
continue;
}
}
@@ -478,12 +488,12 @@ all_collected:
ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
{
- return netfs_wait_for_request(rreq, netfs_read_collection);
+ return netfs_wait_for_in_progress(rreq, netfs_read_collection);
}
ssize_t netfs_wait_for_write(struct netfs_io_request *rreq)
{
- return netfs_wait_for_request(rreq, netfs_write_collection);
+ return netfs_wait_for_in_progress(rreq, netfs_write_collection);
}
/*
@@ -507,6 +517,10 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq,
case 1:
goto all_collected;
case 2:
+ if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) ||
+ !test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
+ break;
+ cond_resched();
continue;
}
}
diff --git a/fs/netfs/read_pgpriv2.c b/fs/netfs/read_pgpriv2.c
index 5bbe906a551d..8097bc069c1d 100644
--- a/fs/netfs/read_pgpriv2.c
+++ b/fs/netfs/read_pgpriv2.c
@@ -110,6 +110,8 @@ static struct netfs_io_request *netfs_pgpriv2_begin_copy_to_cache(
if (!creq->io_streams[1].avail)
goto cancel_put;
+ __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &creq->flags);
+ trace_netfs_copy2cache(rreq, creq);
trace_netfs_write(creq, netfs_write_trace_copy_to_cache);
netfs_stat(&netfs_n_wh_copy_to_cache);
rreq->copy_to_cache = creq;
@@ -154,6 +156,9 @@ void netfs_pgpriv2_end_copy_to_cache(struct netfs_io_request *rreq)
netfs_issue_write(creq, &creq->io_streams[1]);
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &creq->flags);
+ trace_netfs_rreq(rreq, netfs_rreq_trace_end_copy_to_cache);
+ if (list_empty_careful(&creq->io_streams[1].subrequests))
+ netfs_wake_collector(creq);
netfs_put_request(creq, netfs_rreq_trace_put_return);
creq->copy_to_cache = NULL;
diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c
index 9d1d8a8bab72..7158657061e9 100644
--- a/fs/netfs/write_retry.c
+++ b/fs/netfs/write_retry.c
@@ -153,7 +153,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
netfs_sreq_trace_new);
- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_split);
list_add(&subreq->rreq_link, &to->rreq_link);
to = list_next_entry(to, rreq_link);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index e6909cafab68..4bea008dbebd 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1105,6 +1105,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
}
static int ff_layout_async_handle_error_v4(struct rpc_task *task,
+ u32 op_status,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
@@ -1115,32 +1116,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
- switch (task->tk_status) {
- case -NFS4ERR_BADSESSION:
- case -NFS4ERR_BADSLOT:
- case -NFS4ERR_BAD_HIGH_SLOT:
- case -NFS4ERR_DEADSESSION:
- case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
- case -NFS4ERR_SEQ_FALSE_RETRY:
- case -NFS4ERR_SEQ_MISORDERED:
+ switch (op_status) {
+ case NFS4_OK:
+ case NFS4ERR_NXIO:
+ break;
+ case NFSERR_PERM:
+ if (!task->tk_xprt)
+ break;
+ xprt_force_disconnect(task->tk_xprt);
+ goto out_retry;
+ case NFS4ERR_BADSESSION:
+ case NFS4ERR_BADSLOT:
+ case NFS4ERR_BAD_HIGH_SLOT:
+ case NFS4ERR_DEADSESSION:
+ case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ case NFS4ERR_SEQ_FALSE_RETRY:
+ case NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session. Exchangeid "
"flags 0x%x\n", __func__, task->tk_status,
clp->cl_exchange_flags);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
- break;
- case -NFS4ERR_DELAY:
- case -NFS4ERR_GRACE:
+ goto out_retry;
+ case NFS4ERR_DELAY:
+ nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
+ fallthrough;
+ case NFS4ERR_GRACE:
rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
- break;
- case -NFS4ERR_RETRY_UNCACHED_REP:
- break;
+ goto out_retry;
+ case NFS4ERR_RETRY_UNCACHED_REP:
+ goto out_retry;
/* Invalidate Layout errors */
- case -NFS4ERR_PNFS_NO_LAYOUT:
- case -ESTALE: /* mapped NFS4ERR_STALE */
- case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
- case -EISDIR: /* mapped NFS4ERR_ISDIR */
- case -NFS4ERR_FHEXPIRED:
- case -NFS4ERR_WRONG_TYPE:
+ case NFS4ERR_PNFS_NO_LAYOUT:
+ case NFS4ERR_STALE:
+ case NFS4ERR_BADHANDLE:
+ case NFS4ERR_ISDIR:
+ case NFS4ERR_FHEXPIRED:
+ case NFS4ERR_WRONG_TYPE:
dprintk("%s Invalid layout error %d\n", __func__,
task->tk_status);
/*
@@ -1153,6 +1164,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
pnfs_destroy_layout(NFS_I(inode));
rpc_wake_up(&tbl->slot_tbl_waitq);
goto reset;
+ default:
+ break;
+ }
+
+ switch (task->tk_status) {
/* RPC connection errors */
case -ENETDOWN:
case -ENETUNREACH:
@@ -1172,27 +1188,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
rpc_wake_up(&tbl->slot_tbl_waitq);
- fallthrough;
+ break;
default:
- if (ff_layout_avoid_mds_available_ds(lseg))
- return -NFS4ERR_RESET_TO_PNFS;
-reset:
- dprintk("%s Retry through MDS. Error %d\n", __func__,
- task->tk_status);
- return -NFS4ERR_RESET_TO_MDS;
+ break;
}
+
+ if (ff_layout_avoid_mds_available_ds(lseg))
+ return -NFS4ERR_RESET_TO_PNFS;
+reset:
+ dprintk("%s Retry through MDS. Error %d\n", __func__,
+ task->tk_status);
+ return -NFS4ERR_RESET_TO_MDS;
+
+out_retry:
task->tk_status = 0;
return -EAGAIN;
}
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
+ u32 op_status,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
u32 idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
+ switch (op_status) {
+ case NFS_OK:
+ case NFSERR_NXIO:
+ break;
+ case NFSERR_PERM:
+ if (!task->tk_xprt)
+ break;
+ xprt_force_disconnect(task->tk_xprt);
+ goto out_retry;
+ case NFSERR_ACCES:
+ case NFSERR_BADHANDLE:
+ case NFSERR_FBIG:
+ case NFSERR_IO:
+ case NFSERR_NOSPC:
+ case NFSERR_ROFS:
+ case NFSERR_STALE:
+ goto out_reset_to_pnfs;
+ case NFSERR_JUKEBOX:
+ nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
+ goto out_retry;
+ default:
+ break;
+ }
+
switch (task->tk_status) {
/* File access problems. Don't mark the device as unavailable */
case -EACCES:
@@ -1216,6 +1261,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
}
+out_reset_to_pnfs:
/* FIXME: Need to prevent infinite looping here. */
return -NFS4ERR_RESET_TO_PNFS;
out_retry:
@@ -1226,6 +1272,7 @@ out_retry:
}
static int ff_layout_async_handle_error(struct rpc_task *task,
+ u32 op_status,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
@@ -1244,10 +1291,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
switch (vers) {
case 3:
- return ff_layout_async_handle_error_v3(task, clp, lseg, idx);
- case 4:
- return ff_layout_async_handle_error_v4(task, state, clp,
+ return ff_layout_async_handle_error_v3(task, op_status, clp,
lseg, idx);
+ case 4:
+ return ff_layout_async_handle_error_v4(task, op_status, state,
+ clp, lseg, idx);
default:
/* should never happen */
WARN_ON_ONCE(1);
@@ -1300,6 +1348,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
switch (status) {
case NFS4ERR_DELAY:
case NFS4ERR_GRACE:
+ case NFS4ERR_PERM:
break;
case NFS4ERR_NXIO:
ff_layout_mark_ds_unreachable(lseg, idx);
@@ -1332,7 +1381,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
trace_ff_layout_read_error(hdr, task->tk_status);
}
- err = ff_layout_async_handle_error(task, hdr->args.context->state,
+ err = ff_layout_async_handle_error(task, hdr->res.op_status,
+ hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
@@ -1505,7 +1555,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
trace_ff_layout_write_error(hdr, task->tk_status);
}
- err = ff_layout_async_handle_error(task, hdr->args.context->state,
+ err = ff_layout_async_handle_error(task, hdr->res.op_status,
+ hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
@@ -1554,8 +1605,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
trace_ff_layout_commit_error(data, task->tk_status);
}
- err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
- data->lseg, data->ds_commit_index);
+ err = ff_layout_async_handle_error(task, data->res.op_status,
+ NULL, data->ds_clp, data->lseg,
+ data->ds_commit_index);
trace_nfs4_pnfs_commit_ds(data, err);
switch (err) {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 119e447758b9..a2fa6bc4d74e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -557,6 +557,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
set_nlink(inode, fattr->nlink);
else if (fattr_supported & NFS_ATTR_FATTR_NLINK)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_NLINK);
+ else
+ set_nlink(inode, 1);
if (fattr->valid & NFS_ATTR_FATTR_OWNER)
inode->i_uid = fattr->uid;
else if (fattr_supported & NFS_ATTR_FATTR_OWNER)
@@ -633,6 +635,34 @@ nfs_fattr_fixup_delegated(struct inode *inode, struct nfs_fattr *fattr)
}
}
+static void nfs_set_timestamps_to_ts(struct inode *inode, struct iattr *attr)
+{
+ unsigned int cache_flags = 0;
+
+ if (attr->ia_valid & ATTR_MTIME_SET) {
+ struct timespec64 ctime = inode_get_ctime(inode);
+ struct timespec64 mtime = inode_get_mtime(inode);
+ struct timespec64 now;
+ int updated = 0;
+
+ now = inode_set_ctime_current(inode);
+ if (!timespec64_equal(&now, &ctime))
+ updated |= S_CTIME;
+
+ inode_set_mtime_to_ts(inode, attr->ia_mtime);
+ if (!timespec64_equal(&now, &mtime))
+ updated |= S_MTIME;
+
+ inode_maybe_inc_iversion(inode, updated);
+ cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
+ }
+ if (attr->ia_valid & ATTR_ATIME_SET) {
+ inode_set_atime_to_ts(inode, attr->ia_atime);
+ cache_flags |= NFS_INO_INVALID_ATIME;
+ }
+ NFS_I(inode)->cache_validity &= ~cache_flags;
+}
+
static void nfs_update_timestamps(struct inode *inode, unsigned int ia_valid)
{
enum file_time_flags time_flags = 0;
@@ -701,14 +731,27 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (nfs_have_delegated_mtime(inode) && attr->ia_valid & ATTR_MTIME) {
spin_lock(&inode->i_lock);
- nfs_update_timestamps(inode, attr->ia_valid);
+ if (attr->ia_valid & ATTR_MTIME_SET) {
+ nfs_set_timestamps_to_ts(inode, attr);
+ attr->ia_valid &= ~(ATTR_MTIME|ATTR_MTIME_SET|
+ ATTR_ATIME|ATTR_ATIME_SET);
+ } else {
+ nfs_update_timestamps(inode, attr->ia_valid);
+ attr->ia_valid &= ~(ATTR_MTIME|ATTR_ATIME);
+ }
spin_unlock(&inode->i_lock);
- attr->ia_valid &= ~(ATTR_MTIME | ATTR_ATIME);
} else if (nfs_have_delegated_atime(inode) &&
attr->ia_valid & ATTR_ATIME &&
!(attr->ia_valid & ATTR_MTIME)) {
- nfs_update_delegated_atime(inode);
- attr->ia_valid &= ~ATTR_ATIME;
+ if (attr->ia_valid & ATTR_ATIME_SET) {
+ spin_lock(&inode->i_lock);
+ nfs_set_timestamps_to_ts(inode, attr);
+ spin_unlock(&inode->i_lock);
+ attr->ia_valid &= ~(ATTR_ATIME|ATTR_ATIME_SET);
+ } else {
+ nfs_update_delegated_atime(inode);
+ attr->ia_valid &= ~ATTR_ATIME;
+ }
}
/* Optimization: if the end result is no change, don't RPC */
@@ -2546,15 +2589,26 @@ EXPORT_SYMBOL_GPL(nfs_net_id);
static int nfs_net_init(struct net *net)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
+ int err;
nfs_clients_init(net);
if (!rpc_proc_register(net, &nn->rpcstats)) {
- nfs_clients_exit(net);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_proc_rpc;
}
- return nfs_fs_proc_net_init(net);
+ err = nfs_fs_proc_net_init(net);
+ if (err)
+ goto err_proc_nfs;
+
+ return 0;
+
+err_proc_nfs:
+ rpc_proc_unregister(net, "nfs");
+err_proc_rpc:
+ nfs_clients_exit(net);
+ return err;
}
static void nfs_net_exit(struct net *net)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9db317e7dea1..2f5a6aa3fd48 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -325,14 +325,14 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
if (nfs_have_delegated_mtime(inode)) {
if (!(cache_validity & NFS_INO_INVALID_ATIME))
- dst[1] &= ~FATTR4_WORD1_TIME_ACCESS;
+ dst[1] &= ~(FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET);
if (!(cache_validity & NFS_INO_INVALID_MTIME))
- dst[1] &= ~FATTR4_WORD1_TIME_MODIFY;
+ dst[1] &= ~(FATTR4_WORD1_TIME_MODIFY|FATTR4_WORD1_TIME_MODIFY_SET);
if (!(cache_validity & NFS_INO_INVALID_CTIME))
- dst[1] &= ~FATTR4_WORD1_TIME_METADATA;
+ dst[1] &= ~(FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY_SET);
} else if (nfs_have_delegated_atime(inode)) {
if (!(cache_validity & NFS_INO_INVALID_ATIME))
- dst[1] &= ~FATTR4_WORD1_TIME_ACCESS;
+ dst[1] &= ~(FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET);
}
}
@@ -6220,6 +6220,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen,
struct nfs_server *server = NFS_SERVER(inode);
int ret;
+ if (unlikely(NFS_FH(inode)->size == 0))
+ return -ENODATA;
if (!nfs4_server_supports_acls(server, type))
return -EOPNOTSUPP;
ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
@@ -6294,6 +6296,9 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf,
{
struct nfs4_exception exception = { };
int err;
+
+ if (unlikely(NFS_FH(inode)->size == 0))
+ return -ENODATA;
do {
err = __nfs4_proc_set_acl(inode, buf, buflen, type);
trace_nfs4_set_acl(inode, err);
@@ -10861,7 +10866,7 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
{
- ssize_t error, error2, error3;
+ ssize_t error, error2, error3, error4;
size_t left = size;
error = generic_listxattr(dentry, list, left);
@@ -10884,8 +10889,16 @@ static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, left);
if (error3 < 0)
return error3;
+ if (list) {
+ list += error3;
+ left -= error3;
+ }
+
+ error4 = security_inode_listsecurity(d_inode(dentry), list, left);
+ if (error4 < 0)
+ return error4;
- error += error2 + error3;
+ error += error2 + error3 + error4;
if (size && error > size)
return -ERANGE;
return error;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 3adb7d0dbec7..1a7ec68bde15 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2059,8 +2059,10 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
static void nfs_layoutget_end(struct pnfs_layout_hdr *lo)
{
if (atomic_dec_and_test(&lo->plh_outstanding) &&
- test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags))
+ test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) {
+ smp_mb__after_atomic();
wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN);
+ }
}
static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo)
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 6613b8fcceb0..5cf7328d5360 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -472,11 +472,18 @@ static int __nilfs_read_inode(struct super_block *sb,
inode->i_op = &nilfs_symlink_inode_operations;
inode_nohighmem(inode);
inode->i_mapping->a_ops = &nilfs_aops;
- } else {
+ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
inode->i_op = &nilfs_special_inode_operations;
init_special_inode(
inode, inode->i_mode,
huge_decode_dev(le64_to_cpu(raw_inode->i_device_code)));
+ } else {
+ nilfs_error(sb,
+ "invalid file type bits in mode 0%o for inode %lu",
+ inode->i_mode, ino);
+ err = -EIO;
+ goto failed_unmap;
}
nilfs_ifile_unmap_inode(raw_inode);
brelse(bh);
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index c4cdaf5fa7ed..9fb73bafd41d 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -308,6 +308,10 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
goto out_err;
}
+ error = file_f_owner_allocate(filp);
+ if (error)
+ goto out_err;
+
/* new fsnotify mark, we expect most fcntl calls to add a new mark */
new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL);
if (!new_dn_mark) {
@@ -315,10 +319,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
goto out_err;
}
- error = file_f_owner_allocate(filp);
- if (error)
- goto out_err;
-
/* set up the new_fsn_mark and new_dn_mark */
new_fsn_mark = &new_dn_mark->fsn_mark;
fsnotify_init_mark(new_fsn_mark, dnotify_group);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 0819c739cc2f..5d6b60d56c27 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -305,7 +305,9 @@ enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path)
struct dentry *ovl_dentry_upper(struct dentry *dentry)
{
- return ovl_upperdentry_dereference(OVL_I(d_inode(dentry)));
+ struct inode *inode = d_inode(dentry);
+
+ return inode ? ovl_upperdentry_dereference(OVL_I(inode)) : NULL;
}
struct dentry *ovl_dentry_lower(struct dentry *dentry)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index a3eb3b740f76..3604b616311c 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -42,7 +42,7 @@ static void proc_evict_inode(struct inode *inode)
head = ei->sysctl;
if (head) {
- RCU_INIT_POINTER(ei->sysctl, NULL);
+ WRITE_ONCE(ei->sysctl, NULL);
proc_sys_evict_inode(inode, head);
}
}
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index cc9d74a06ff0..08b78150cdde 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -918,17 +918,21 @@ static int proc_sys_compare(const struct dentry *dentry,
struct ctl_table_header *head;
struct inode *inode;
- /* Although proc doesn't have negative dentries, rcu-walk means
- * that inode here can be NULL */
- /* AV: can it, indeed? */
- inode = d_inode_rcu(dentry);
- if (!inode)
- return 1;
if (name->len != len)
return 1;
if (memcmp(name->name, str, len))
return 1;
- head = rcu_dereference(PROC_I(inode)->sysctl);
+
+ // false positive is fine here - we'll recheck anyway
+ if (d_in_lookup(dentry))
+ return 0;
+
+ inode = d_inode_rcu(dentry);
+ // we just might have run into dentry in the middle of __dentry_kill()
+ if (!inode)
+ return 1;
+
+ head = READ_ONCE(PROC_I(inode)->sysctl);
return !head || !sysctl_is_seen(head);
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 994cde10e3f4..e57e323817e7 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -36,9 +36,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
unsigned long text, lib, swap, anon, file, shmem;
unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
- anon = get_mm_counter(mm, MM_ANONPAGES);
- file = get_mm_counter(mm, MM_FILEPAGES);
- shmem = get_mm_counter(mm, MM_SHMEMPAGES);
+ anon = get_mm_counter_sum(mm, MM_ANONPAGES);
+ file = get_mm_counter_sum(mm, MM_FILEPAGES);
+ shmem = get_mm_counter_sum(mm, MM_SHMEMPAGES);
/*
* Note: to minimize their overhead, mm maintains hiwater_vm and
@@ -59,7 +59,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
text = min(text, mm->exec_vm << PAGE_SHIFT);
lib = (mm->exec_vm << PAGE_SHIFT) - text;
- swap = get_mm_counter(mm, MM_SWAPENTS);
+ swap = get_mm_counter_sum(mm, MM_SWAPENTS);
SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
@@ -92,12 +92,12 @@ unsigned long task_statm(struct mm_struct *mm,
unsigned long *shared, unsigned long *text,
unsigned long *data, unsigned long *resident)
{
- *shared = get_mm_counter(mm, MM_FILEPAGES) +
- get_mm_counter(mm, MM_SHMEMPAGES);
+ *shared = get_mm_counter_sum(mm, MM_FILEPAGES) +
+ get_mm_counter_sum(mm, MM_SHMEMPAGES);
*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
>> PAGE_SHIFT;
*data = mm->data_vm + mm->stack_vm;
- *resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
+ *resident = *shared + get_mm_counter_sum(mm, MM_ANONPAGES);
return mm->total_vm;
}
@@ -2179,7 +2179,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
categories |= PAGE_IS_FILE;
}
- if (is_zero_pfn(pmd_pfn(pmd)))
+ if (is_huge_zero_pmd(pmd))
categories |= PAGE_IS_PFNZERO;
if (pmd_soft_dirty(pmd))
categories |= PAGE_IS_SOFT_DIRTY;
diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
index e03c890de0a0..c0196be0e65f 100644
--- a/fs/smb/client/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -362,6 +362,10 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
c = 0;
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ struct smbdirect_socket_parameters *sp;
+#endif
+
/* channel info will be printed as a part of sessions below */
if (SERVER_IS_CHAN(server))
continue;
@@ -383,25 +387,26 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
seq_printf(m, "\nSMBDirect transport not available");
goto skip_rdma;
}
+ sp = &server->smbd_conn->socket.parameters;
seq_printf(m, "\nSMBDirect (in hex) protocol version: %x "
"transport status: %x",
server->smbd_conn->protocol,
- server->smbd_conn->transport_status);
+ server->smbd_conn->socket.status);
seq_printf(m, "\nConn receive_credit_max: %x "
"send_credit_target: %x max_send_size: %x",
- server->smbd_conn->receive_credit_max,
- server->smbd_conn->send_credit_target,
- server->smbd_conn->max_send_size);
+ sp->recv_credit_max,
+ sp->send_credit_target,
+ sp->max_send_size);
seq_printf(m, "\nConn max_fragmented_recv_size: %x "
"max_fragmented_send_size: %x max_receive_size:%x",
- server->smbd_conn->max_fragmented_recv_size,
- server->smbd_conn->max_fragmented_send_size,
- server->smbd_conn->max_receive_size);
+ sp->max_fragmented_recv_size,
+ sp->max_fragmented_send_size,
+ sp->max_recv_size);
seq_printf(m, "\nConn keep_alive_interval: %x "
"max_readwrite_size: %x rdma_readwrite_threshold: %x",
- server->smbd_conn->keep_alive_interval,
- server->smbd_conn->max_readwrite_size,
+ sp->keepalive_interval_msec * 1000,
+ sp->max_read_write_size,
server->smbd_conn->rdma_readwrite_threshold);
seq_printf(m, "\nDebug count_get_receive_buffer: %x "
"count_put_receive_buffer: %x count_send_empty: %x",
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 0c80ca352f3f..eed29f043114 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -709,6 +709,7 @@ inc_rfc1001_len(void *buf, int count)
struct TCP_Server_Info {
struct list_head tcp_ses_list;
struct list_head smb_ses_list;
+ struct list_head rlist; /* reconnect list */
spinlock_t srv_lock; /* protect anything here that is not protected */
__u64 conn_id; /* connection identifier (useful for debugging) */
int srv_count; /* reference counter */
@@ -773,8 +774,10 @@ struct TCP_Server_Info {
char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
__u32 sequence_number; /* for signing, protected by srv_mutex */
__u32 reconnect_instance; /* incremented on each reconnect */
+ __le32 session_key_id; /* retrieved from negotiate response and send in session setup request */
struct session_key session_key;
unsigned long lstrp; /* when we got last response from this server */
+ unsigned long neg_start; /* when negotiate started (jiffies) */
struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
#define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */
#define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */
@@ -1301,6 +1304,7 @@ struct cifs_tcon {
bool use_persistent:1; /* use persistent instead of durable handles */
bool no_lease:1; /* Do not request leases on files or directories */
bool use_witness:1; /* use witness protocol */
+ bool dummy:1; /* dummy tcon used for reconnecting channels */
__le32 capabilities;
__u32 share_flags;
__u32 maximal_access;
diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h
index 1b79fe07476f..d9cf7db0ac35 100644
--- a/fs/smb/client/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -597,7 +597,7 @@ typedef union smb_com_session_setup_andx {
__le16 MaxBufferSize;
__le16 MaxMpxCount;
__le16 VcNumber;
- __u32 SessionKey;
+ __le32 SessionKey;
__le16 SecurityBlobLength;
__u32 Reserved;
__le32 Capabilities; /* see below */
@@ -616,7 +616,7 @@ typedef union smb_com_session_setup_andx {
__le16 MaxBufferSize;
__le16 MaxMpxCount;
__le16 VcNumber;
- __u32 SessionKey;
+ __le32 SessionKey;
__le16 CaseInsensitivePasswordLength; /* ASCII password len */
__le16 CaseSensitivePasswordLength; /* Unicode password length*/
__u32 Reserved; /* see below */
@@ -654,7 +654,7 @@ typedef union smb_com_session_setup_andx {
__le16 MaxBufferSize;
__le16 MaxMpxCount;
__le16 VcNumber;
- __u32 SessionKey;
+ __le32 SessionKey;
__le16 PasswordLength;
__u32 Reserved; /* encrypt key len and offset */
__le16 ByteCount;
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index 66093fa78aed..045227ed4efc 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -136,6 +136,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
struct smb_hdr *out_buf,
int *bytes_returned);
+void smb2_query_server_interfaces(struct work_struct *work);
void
cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
bool all_channels);
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index a3ba3346ed31..0e509a0433fb 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -498,6 +498,7 @@ CIFSSMBNegotiate(const unsigned int xid,
server->max_rw = le32_to_cpu(pSMBr->MaxRawSize);
cifs_dbg(NOISY, "Max buf = %d\n", ses->server->maxBuf);
server->capabilities = le32_to_cpu(pSMBr->Capabilities);
+ server->session_key_id = pSMBr->SessionKey;
server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
server->timeAdj *= 60;
@@ -1334,6 +1335,7 @@ cifs_readv_callback(struct mid_q_entry *mid)
break;
case MID_REQUEST_SUBMITTED:
case MID_RETRY_NEEDED:
+ __set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags);
rdata->result = -EAGAIN;
if (server->sign && rdata->got_bytes)
/* reset bytes number since we can not check a sign */
@@ -1713,6 +1715,7 @@ cifs_writev_callback(struct mid_q_entry *mid)
break;
case MID_REQUEST_SUBMITTED:
case MID_RETRY_NEEDED:
+ __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
result = -EAGAIN;
break;
default:
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index f9aef60f1901..7ebed5e856dc 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -97,7 +97,7 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server)
return rc;
}
-static void smb2_query_server_interfaces(struct work_struct *work)
+void smb2_query_server_interfaces(struct work_struct *work)
{
int rc;
int xid;
@@ -124,6 +124,14 @@ static void smb2_query_server_interfaces(struct work_struct *work)
(SMB_INTERFACE_POLL_INTERVAL * HZ));
}
+#define set_need_reco(server) \
+do { \
+ spin_lock(&server->srv_lock); \
+ if (server->tcpStatus != CifsExiting) \
+ server->tcpStatus = CifsNeedReconnect; \
+ spin_unlock(&server->srv_lock); \
+} while (0)
+
/*
* Update the tcpStatus for the server.
* This is used to signal the cifsd thread to call cifs_reconnect
@@ -137,39 +145,45 @@ void
cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
bool all_channels)
{
- struct TCP_Server_Info *pserver;
+ struct TCP_Server_Info *nserver;
struct cifs_ses *ses;
+ LIST_HEAD(reco);
int i;
- /* If server is a channel, select the primary channel */
- pserver = SERVER_IS_CHAN(server) ? server->primary_server : server;
-
/* if we need to signal just this channel */
if (!all_channels) {
- spin_lock(&server->srv_lock);
- if (server->tcpStatus != CifsExiting)
- server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&server->srv_lock);
+ set_need_reco(server);
return;
}
- spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
- if (cifs_ses_exiting(ses))
- continue;
- spin_lock(&ses->chan_lock);
- for (i = 0; i < ses->chan_count; i++) {
- if (!ses->chans[i].server)
+ if (SERVER_IS_CHAN(server))
+ server = server->primary_server;
+ scoped_guard(spinlock, &cifs_tcp_ses_lock) {
+ set_need_reco(server);
+ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ spin_lock(&ses->ses_lock);
+ if (ses->ses_status == SES_EXITING) {
+ spin_unlock(&ses->ses_lock);
continue;
-
- spin_lock(&ses->chans[i].server->srv_lock);
- if (ses->chans[i].server->tcpStatus != CifsExiting)
- ses->chans[i].server->tcpStatus = CifsNeedReconnect;
- spin_unlock(&ses->chans[i].server->srv_lock);
+ }
+ spin_lock(&ses->chan_lock);
+ for (i = 1; i < ses->chan_count; i++) {
+ nserver = ses->chans[i].server;
+ if (!nserver)
+ continue;
+ nserver->srv_count++;
+ list_add(&nserver->rlist, &reco);
+ }
+ spin_unlock(&ses->chan_lock);
+ spin_unlock(&ses->ses_lock);
}
- spin_unlock(&ses->chan_lock);
}
- spin_unlock(&cifs_tcp_ses_lock);
+
+ list_for_each_entry_safe(server, nserver, &reco, rlist) {
+ list_del_init(&server->rlist);
+ set_need_reco(server);
+ cifs_put_tcp_session(server, 0);
+ }
}
/*
@@ -665,12 +679,12 @@ server_unresponsive(struct TCP_Server_Info *server)
/*
* If we're in the process of mounting a share or reconnecting a session
* and the server abruptly shut down (e.g. socket wasn't closed, packet
- * had been ACK'ed but no SMB response), don't wait longer than 20s to
- * negotiate protocol.
+ * had been ACK'ed but no SMB response), don't wait longer than 20s from
+ * when negotiate actually started.
*/
spin_lock(&server->srv_lock);
if (server->tcpStatus == CifsInNegotiate &&
- time_after(jiffies, server->lstrp + 20 * HZ)) {
+ time_after(jiffies, server->neg_start + 20 * HZ)) {
spin_unlock(&server->srv_lock);
cifs_reconnect(server, false);
return true;
@@ -2866,20 +2880,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
tcon->max_cached_dirs = ctx->max_cached_dirs;
tcon->nodelete = ctx->nodelete;
tcon->local_lease = ctx->local_lease;
- INIT_LIST_HEAD(&tcon->pending_opens);
tcon->status = TID_GOOD;
- INIT_DELAYED_WORK(&tcon->query_interfaces,
- smb2_query_server_interfaces);
if (ses->server->dialect >= SMB30_PROT_ID &&
(ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
/* schedule query interfaces poll */
queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
(SMB_INTERFACE_POLL_INTERVAL * HZ));
}
-#ifdef CONFIG_CIFS_DFS_UPCALL
- INIT_DELAYED_WORK(&tcon->dfs_cache_work, dfs_cache_refresh);
-#endif
spin_lock(&cifs_tcp_ses_lock);
list_add(&tcon->tcon_list, &ses->tcon_list);
spin_unlock(&cifs_tcp_ses_lock);
@@ -4195,6 +4203,7 @@ retry:
server->lstrp = jiffies;
server->tcpStatus = CifsInNegotiate;
+ server->neg_start = jiffies;
spin_unlock(&server->srv_lock);
rc = server->ops->negotiate(xid, ses, server);
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 9835672267d2..3819378b1012 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -3084,7 +3084,8 @@ void cifs_oplock_break(struct work_struct *work)
struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
oplock_break);
struct inode *inode = d_inode(cfile->dentry);
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct super_block *sb = inode->i_sb;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct cifsInodeInfo *cinode = CIFS_I(inode);
struct cifs_tcon *tcon;
struct TCP_Server_Info *server;
@@ -3094,6 +3095,12 @@ void cifs_oplock_break(struct work_struct *work)
__u64 persistent_fid, volatile_fid;
__u16 net_fid;
+ /*
+ * Hold a reference to the superblock to prevent it and its inodes from
+ * being freed while we are accessing cinode. Otherwise, _cifsFileInfo_put()
+ * may release the last reference to the sb and trigger inode eviction.
+ */
+ cifs_sb_active(sb);
wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
TASK_UNINTERRUPTIBLE);
@@ -3166,6 +3173,7 @@ oplock_break_ack:
cifs_put_tlink(tlink);
out:
cifs_done_oplock_break(cinode);
+ cifs_sb_deactive(sb);
}
static int cifs_swap_activate(struct swap_info_struct *sis,
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index a634a34d4086..59ccc2229ab3 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -1824,10 +1824,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
cifs_errorf(fc, "symlinkroot mount options must be absolute path\n");
goto cifs_parse_mount_err;
}
- kfree(ctx->symlinkroot);
- ctx->symlinkroot = kstrdup(param->string, GFP_KERNEL);
- if (!ctx->symlinkroot)
+ if (strnlen(param->string, PATH_MAX) == PATH_MAX) {
+ cifs_errorf(fc, "symlinkroot path too long (max path length: %u)\n",
+ PATH_MAX - 1);
goto cifs_parse_mount_err;
+ }
+ kfree(ctx->symlinkroot);
+ ctx->symlinkroot = param->string;
+ param->string = NULL;
break;
}
/* case Opt_ignore: - is ignored as expected ... */
@@ -1837,13 +1841,6 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
goto cifs_parse_mount_err;
}
- /*
- * By default resolve all native absolute symlinks relative to "/mnt/".
- * Same default has drvfs driver running in WSL for resolving SMB shares.
- */
- if (!ctx->symlinkroot)
- ctx->symlinkroot = kstrdup("/mnt/", GFP_KERNEL);
-
return 0;
cifs_parse_mount_err:
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index 7b6ed9b23e71..da23cc12a52c 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -151,6 +151,12 @@ tcon_info_alloc(bool dir_leases_enabled, enum smb3_tcon_ref_trace trace)
#ifdef CONFIG_CIFS_DFS_UPCALL
INIT_LIST_HEAD(&ret_buf->dfs_ses_list);
#endif
+ INIT_LIST_HEAD(&ret_buf->pending_opens);
+ INIT_DELAYED_WORK(&ret_buf->query_interfaces,
+ smb2_query_server_interfaces);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ INIT_DELAYED_WORK(&ret_buf->dfs_cache_work, dfs_cache_refresh);
+#endif
return ret_buf;
}
@@ -326,6 +332,14 @@ check_smb_hdr(struct smb_hdr *smb)
if (smb->Command == SMB_COM_LOCKING_ANDX)
return 0;
+ /*
+ * Windows NT server returns error resposne (e.g. STATUS_DELETE_PENDING
+ * or STATUS_OBJECT_NAME_NOT_FOUND or ERRDOS/ERRbadfile or any other)
+ * for some TRANS2 requests without the RESPONSE flag set in header.
+ */
+ if (smb->Command == SMB_COM_TRANSACTION2 && smb->Status.CifsError != 0)
+ return 0;
+
cifs_dbg(VFS, "Server sent request, not response. mid=%u\n",
get_mid(smb));
return 1;
diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c
index c3feb26fcfd0..7bf3214117a9 100644
--- a/fs/smb/client/readdir.c
+++ b/fs/smb/client/readdir.c
@@ -263,7 +263,7 @@ cifs_posix_to_fattr(struct cifs_fattr *fattr, struct smb2_posix_info *info,
/* The Mode field in the response can now include the file type as well */
fattr->cf_mode = wire_mode_to_posix(le32_to_cpu(info->Mode),
fattr->cf_cifsattrs & ATTR_DIRECTORY);
- fattr->cf_dtype = S_DT(le32_to_cpu(info->Mode));
+ fattr->cf_dtype = S_DT(fattr->cf_mode);
switch (fattr->cf_mode & S_IFMT) {
case S_IFLNK:
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index 511611206dab..5fa29a97ac15 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -57,6 +57,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
struct reparse_symlink_data_buffer *buf = NULL;
struct cifs_open_info_data data = {};
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ const char *symroot = cifs_sb->ctx->symlinkroot;
struct inode *new;
struct kvec iov;
__le16 *path = NULL;
@@ -82,7 +83,8 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
.symlink_target = symlink_target,
};
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') {
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) &&
+ symroot && symname[0] == '/') {
/*
* This is a request to create an absolute symlink on the server
* which does not support POSIX paths, and expects symlink in
@@ -92,7 +94,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
* ensure compatibility of this symlink stored in absolute form
* on the SMB server.
*/
- if (!strstarts(symname, cifs_sb->ctx->symlinkroot)) {
+ if (!strstarts(symname, symroot)) {
/*
* If the absolute Linux symlink target path is not
* inside "symlinkroot" location then there is no way
@@ -101,12 +103,12 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode,
cifs_dbg(VFS,
"absolute symlink '%s' cannot be converted to NT format "
"because it is outside of symlinkroot='%s'\n",
- symname, cifs_sb->ctx->symlinkroot);
+ symname, symroot);
rc = -EINVAL;
goto out;
}
- len = strlen(cifs_sb->ctx->symlinkroot);
- if (cifs_sb->ctx->symlinkroot[len-1] != '/')
+ len = strlen(symroot);
+ if (symroot[len - 1] != '/')
len++;
if (symname[len] >= 'a' && symname[len] <= 'z' &&
(symname[len+1] == '/' || symname[len+1] == '\0')) {
@@ -782,6 +784,7 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
const char *full_path,
struct cifs_sb_info *cifs_sb)
{
+ const char *symroot = cifs_sb->ctx->symlinkroot;
char sep = CIFS_DIR_SEP(cifs_sb);
char *linux_target = NULL;
char *smb_target = NULL;
@@ -815,7 +818,8 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len,
goto out;
}
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && !relative) {
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) &&
+ symroot && !relative) {
/*
* This is an absolute symlink from the server which does not
* support POSIX paths, so the symlink is in NT-style path.
@@ -875,15 +879,8 @@ globalroot:
abs_path += sizeof("\\DosDevices\\")-1;
else if (strstarts(abs_path, "\\GLOBAL??\\"))
abs_path += sizeof("\\GLOBAL??\\")-1;
- else {
- /* Unhandled absolute symlink, points outside of DOS/Win32 */
- cifs_dbg(VFS,
- "absolute symlink '%s' cannot be converted from NT format "
- "because points to unknown target\n",
- smb_target);
- rc = -EIO;
- goto out;
- }
+ else
+ goto out_unhandled_target;
/* Sometimes path separator after \?? is double backslash */
if (abs_path[0] == '\\')
@@ -910,25 +907,19 @@ globalroot:
abs_path++;
abs_path[0] = drive_letter;
} else {
- /* Unhandled absolute symlink. Report an error. */
- cifs_dbg(VFS,
- "absolute symlink '%s' cannot be converted from NT format "
- "because points to unknown target\n",
- smb_target);
- rc = -EIO;
- goto out;
+ goto out_unhandled_target;
}
abs_path_len = strlen(abs_path)+1;
- symlinkroot_len = strlen(cifs_sb->ctx->symlinkroot);
- if (cifs_sb->ctx->symlinkroot[symlinkroot_len-1] == '/')
+ symlinkroot_len = strlen(symroot);
+ if (symroot[symlinkroot_len - 1] == '/')
symlinkroot_len--;
linux_target = kmalloc(symlinkroot_len + 1 + abs_path_len, GFP_KERNEL);
if (!linux_target) {
rc = -ENOMEM;
goto out;
}
- memcpy(linux_target, cifs_sb->ctx->symlinkroot, symlinkroot_len);
+ memcpy(linux_target, symroot, symlinkroot_len);
linux_target[symlinkroot_len] = '/';
memcpy(linux_target + symlinkroot_len + 1, abs_path, abs_path_len);
} else if (smb_target[0] == sep && relative) {
@@ -966,6 +957,7 @@ globalroot:
* These paths have same format as Linux symlinks, so no
* conversion is needed.
*/
+out_unhandled_target:
linux_target = smb_target;
smb_target = NULL;
}
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index 12c99fb4023d..330bc3d25bad 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -631,6 +631,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses,
USHRT_MAX));
pSMB->req.MaxMpxCount = cpu_to_le16(server->maxReq);
pSMB->req.VcNumber = cpu_to_le16(1);
+ pSMB->req.SessionKey = server->session_key_id;
/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
@@ -1687,22 +1688,22 @@ _sess_auth_rawntlmssp_assemble_req(struct sess_data *sess_data)
pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
capabilities = cifs_ssetup_hdr(ses, server, pSMB);
- if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
- cifs_dbg(VFS, "NTLMSSP requires Unicode support\n");
- return -ENOSYS;
- }
-
pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
capabilities |= CAP_EXTENDED_SECURITY;
pSMB->req.Capabilities |= cpu_to_le32(capabilities);
bcc_ptr = sess_data->iov[2].iov_base;
- /* unicode strings must be word aligned */
- if (!IS_ALIGNED(sess_data->iov[0].iov_len + sess_data->iov[1].iov_len, 2)) {
- *bcc_ptr = 0;
- bcc_ptr++;
+
+ if (pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) {
+ /* unicode strings must be word aligned */
+ if (!IS_ALIGNED(sess_data->iov[0].iov_len + sess_data->iov[1].iov_len, 2)) {
+ *bcc_ptr = 0;
+ bcc_ptr++;
+ }
+ unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp);
+ } else {
+ ascii_oslm_strings(&bcc_ptr, sess_data->nls_cp);
}
- unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp);
sess_data->iov[2].iov_len = (long) bcc_ptr -
(long) sess_data->iov[2].iov_base;
diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c
index 2a3e46b8e15a..a11a2a693c51 100644
--- a/fs/smb/client/smb2inode.c
+++ b/fs/smb/client/smb2inode.c
@@ -1346,7 +1346,8 @@ struct inode *smb2_get_reparse_inode(struct cifs_open_info_data *data,
* empty object on the server.
*/
if (!(le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS))
- return ERR_PTR(-EOPNOTSUPP);
+ if (!tcon->posix_extensions)
+ return ERR_PTR(-EOPNOTSUPP);
oparms = CIFS_OPARMS(cifs_sb, tcon, full_path,
SYNCHRONIZE | DELETE |
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 2fe8eeb98535..fcbc4048f106 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -504,6 +504,9 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
wsize = min_t(unsigned int, wsize, server->max_write);
#ifdef CONFIG_CIFS_SMB_DIRECT
if (server->rdma) {
+ struct smbdirect_socket_parameters *sp =
+ &server->smbd_conn->socket.parameters;
+
if (server->sign)
/*
* Account for SMB2 data transfer packet header and
@@ -511,12 +514,12 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
*/
wsize = min_t(unsigned int,
wsize,
- server->smbd_conn->max_fragmented_send_size -
+ sp->max_fragmented_send_size -
SMB2_READWRITE_PDU_HEADER_SIZE -
sizeof(struct smb2_transform_hdr));
else
wsize = min_t(unsigned int,
- wsize, server->smbd_conn->max_readwrite_size);
+ wsize, sp->max_read_write_size);
}
#endif
if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
@@ -552,6 +555,9 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
rsize = min_t(unsigned int, rsize, server->max_read);
#ifdef CONFIG_CIFS_SMB_DIRECT
if (server->rdma) {
+ struct smbdirect_socket_parameters *sp =
+ &server->smbd_conn->socket.parameters;
+
if (server->sign)
/*
* Account for SMB2 data transfer packet header and
@@ -559,12 +565,12 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
*/
rsize = min_t(unsigned int,
rsize,
- server->smbd_conn->max_fragmented_recv_size -
+ sp->max_fragmented_recv_size -
SMB2_READWRITE_PDU_HEADER_SIZE -
sizeof(struct smb2_transform_hdr));
else
rsize = min_t(unsigned int,
- rsize, server->smbd_conn->max_readwrite_size);
+ rsize, sp->max_read_write_size);
}
#endif
@@ -4307,6 +4313,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
u8 key[SMB3_ENC_DEC_KEY_SIZE];
struct aead_request *req;
u8 *iv;
+ DECLARE_CRYPTO_WAIT(wait);
unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
void *creq;
size_t sensitive_size;
@@ -4357,7 +4364,11 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
aead_request_set_crypt(req, sg, sg, crypt_len, iv);
aead_request_set_ad(req, assoc_data_len);
- rc = enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req);
+ aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done, &wait);
+
+ rc = crypto_wait_req(enc ? crypto_aead_encrypt(req)
+ : crypto_aead_decrypt(req), &wait);
if (!rc && enc)
memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE);
@@ -5246,7 +5257,8 @@ static int smb2_make_node(unsigned int xid, struct inode *inode,
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
rc = cifs_sfu_make_node(xid, inode, dentry, tcon,
full_path, mode, dev);
- } else if (le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS) {
+ } else if ((le32_to_cpu(tcon->fsAttrInfo.Attributes) & FILE_SUPPORTS_REPARSE_POINTS)
+ || (tcon->posix_extensions)) {
rc = smb2_mknod_reparse(xid, inode, dentry, tcon,
full_path, mode, dev);
}
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 72903265b170..2c0cc544dfb3 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -423,9 +423,9 @@ skip_sess_setup:
free_xid(xid);
ses->flags &= ~CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES;
- /* regardless of rc value, setup polling */
- queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
- (SMB_INTERFACE_POLL_INTERVAL * HZ));
+ if (!tcon->ipc && !tcon->dummy)
+ queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
+ (SMB_INTERFACE_POLL_INTERVAL * HZ));
mutex_unlock(&ses->session_mutex);
@@ -4221,10 +4221,8 @@ void smb2_reconnect_server(struct work_struct *work)
}
goto done;
}
-
tcon->status = TID_GOOD;
- tcon->retry = false;
- tcon->need_reconnect = false;
+ tcon->dummy = true;
/* now reconnect sessions for necessary channels */
list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) {
@@ -4854,6 +4852,7 @@ smb2_writev_callback(struct mid_q_entry *mid)
break;
case MID_REQUEST_SUBMITTED:
case MID_RETRY_NEEDED:
+ __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
result = -EAGAIN;
break;
case MID_RESPONSE_MALFORMED:
diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c
index 9d8be034f103..754e94a0e07f 100644
--- a/fs/smb/client/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/highmem.h>
#include <linux/folio_queue.h>
+#include "../common/smbdirect/smbdirect_pdu.h"
#include "smbdirect.h"
#include "cifs_debug.h"
#include "cifsproto.h"
@@ -50,9 +51,6 @@ struct smb_extract_to_rdma {
static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len,
struct smb_extract_to_rdma *rdma);
-/* SMBD version number */
-#define SMBD_V1 0x0100
-
/* Port numbers for SMBD transport */
#define SMB_PORT 445
#define SMBD_PORT 5445
@@ -165,10 +163,11 @@ static void smbd_disconnect_rdma_work(struct work_struct *work)
{
struct smbd_connection *info =
container_of(work, struct smbd_connection, disconnect_work);
+ struct smbdirect_socket *sc = &info->socket;
- if (info->transport_status == SMBD_CONNECTED) {
- info->transport_status = SMBD_DISCONNECTING;
- rdma_disconnect(info->id);
+ if (sc->status == SMBDIRECT_SOCKET_CONNECTED) {
+ sc->status = SMBDIRECT_SOCKET_DISCONNECTING;
+ rdma_disconnect(sc->rdma.cm_id);
}
}
@@ -182,6 +181,7 @@ static int smbd_conn_upcall(
struct rdma_cm_id *id, struct rdma_cm_event *event)
{
struct smbd_connection *info = id->context;
+ struct smbdirect_socket *sc = &info->socket;
log_rdma_event(INFO, "event=%d status=%d\n",
event->event, event->status);
@@ -205,7 +205,7 @@ static int smbd_conn_upcall(
case RDMA_CM_EVENT_ESTABLISHED:
log_rdma_event(INFO, "connected event=%d\n", event->event);
- info->transport_status = SMBD_CONNECTED;
+ sc->status = SMBDIRECT_SOCKET_CONNECTED;
wake_up_interruptible(&info->conn_wait);
break;
@@ -213,20 +213,20 @@ static int smbd_conn_upcall(
case RDMA_CM_EVENT_UNREACHABLE:
case RDMA_CM_EVENT_REJECTED:
log_rdma_event(INFO, "connecting failed event=%d\n", event->event);
- info->transport_status = SMBD_DISCONNECTED;
+ sc->status = SMBDIRECT_SOCKET_DISCONNECTED;
wake_up_interruptible(&info->conn_wait);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_DISCONNECTED:
/* This happens when we fail the negotiation */
- if (info->transport_status == SMBD_NEGOTIATE_FAILED) {
- info->transport_status = SMBD_DISCONNECTED;
+ if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_FAILED) {
+ sc->status = SMBDIRECT_SOCKET_DISCONNECTED;
wake_up(&info->conn_wait);
break;
}
- info->transport_status = SMBD_DISCONNECTED;
+ sc->status = SMBDIRECT_SOCKET_DISCONNECTED;
wake_up_interruptible(&info->disconn_wait);
wake_up_interruptible(&info->wait_reassembly_queue);
wake_up_interruptible_all(&info->wait_send_queue);
@@ -275,6 +275,8 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc)
int i;
struct smbd_request *request =
container_of(wc->wr_cqe, struct smbd_request, cqe);
+ struct smbd_connection *info = request->info;
+ struct smbdirect_socket *sc = &info->socket;
log_rdma_send(INFO, "smbd_request 0x%p completed wc->status=%d\n",
request, wc->status);
@@ -286,7 +288,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc)
}
for (i = 0; i < request->num_sge; i++)
- ib_dma_unmap_single(request->info->id->device,
+ ib_dma_unmap_single(sc->ib.dev,
request->sge[i].addr,
request->sge[i].length,
DMA_TO_DEVICE);
@@ -299,7 +301,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc)
mempool_free(request, request->info->request_mempool);
}
-static void dump_smbd_negotiate_resp(struct smbd_negotiate_resp *resp)
+static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp)
{
log_rdma_event(INFO, "resp message min_version %u max_version %u negotiated_version %u credits_requested %u credits_granted %u status %u max_readwrite_size %u preferred_send_size %u max_receive_size %u max_fragmented_size %u\n",
resp->min_version, resp->max_version,
@@ -318,15 +320,17 @@ static bool process_negotiation_response(
struct smbd_response *response, int packet_length)
{
struct smbd_connection *info = response->info;
- struct smbd_negotiate_resp *packet = smbd_response_payload(response);
+ struct smbdirect_socket *sc = &info->socket;
+ struct smbdirect_socket_parameters *sp = &sc->parameters;
+ struct smbdirect_negotiate_resp *packet = smbd_response_payload(response);
- if (packet_length < sizeof(struct smbd_negotiate_resp)) {
+ if (packet_length < sizeof(struct smbdirect_negotiate_resp)) {
log_rdma_event(ERR,
"error: packet_length=%d\n", packet_length);
return false;
}
- if (le16_to_cpu(packet->negotiated_version) != SMBD_V1) {
+ if (le16_to_cpu(packet->negotiated_version) != SMBDIRECT_V1) {
log_rdma_event(ERR, "error: negotiated_version=%x\n",
le16_to_cpu(packet->negotiated_version));
return false;
@@ -347,20 +351,20 @@ static bool process_negotiation_response(
atomic_set(&info->receive_credits, 0);
- if (le32_to_cpu(packet->preferred_send_size) > info->max_receive_size) {
+ if (le32_to_cpu(packet->preferred_send_size) > sp->max_recv_size) {
log_rdma_event(ERR, "error: preferred_send_size=%d\n",
le32_to_cpu(packet->preferred_send_size));
return false;
}
- info->max_receive_size = le32_to_cpu(packet->preferred_send_size);
+ sp->max_recv_size = le32_to_cpu(packet->preferred_send_size);
if (le32_to_cpu(packet->max_receive_size) < SMBD_MIN_RECEIVE_SIZE) {
log_rdma_event(ERR, "error: max_receive_size=%d\n",
le32_to_cpu(packet->max_receive_size));
return false;
}
- info->max_send_size = min_t(int, info->max_send_size,
- le32_to_cpu(packet->max_receive_size));
+ sp->max_send_size = min_t(u32, sp->max_send_size,
+ le32_to_cpu(packet->max_receive_size));
if (le32_to_cpu(packet->max_fragmented_size) <
SMBD_MIN_FRAGMENTED_SIZE) {
@@ -368,18 +372,18 @@ static bool process_negotiation_response(
le32_to_cpu(packet->max_fragmented_size));
return false;
}
- info->max_fragmented_send_size =
+ sp->max_fragmented_send_size =
le32_to_cpu(packet->max_fragmented_size);
info->rdma_readwrite_threshold =
- rdma_readwrite_threshold > info->max_fragmented_send_size ?
- info->max_fragmented_send_size :
+ rdma_readwrite_threshold > sp->max_fragmented_send_size ?
+ sp->max_fragmented_send_size :
rdma_readwrite_threshold;
- info->max_readwrite_size = min_t(u32,
+ sp->max_read_write_size = min_t(u32,
le32_to_cpu(packet->max_readwrite_size),
info->max_frmr_depth * PAGE_SIZE);
- info->max_frmr_depth = info->max_readwrite_size / PAGE_SIZE;
+ info->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE;
return true;
}
@@ -393,8 +397,9 @@ static void smbd_post_send_credits(struct work_struct *work)
struct smbd_connection *info =
container_of(work, struct smbd_connection,
post_send_credits_work);
+ struct smbdirect_socket *sc = &info->socket;
- if (info->transport_status != SMBD_CONNECTED) {
+ if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
wake_up(&info->wait_receive_queues);
return;
}
@@ -448,7 +453,7 @@ static void smbd_post_send_credits(struct work_struct *work)
/* Called from softirq, when recv is done */
static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
- struct smbd_data_transfer *data_transfer;
+ struct smbdirect_data_transfer *data_transfer;
struct smbd_response *response =
container_of(wc->wr_cqe, struct smbd_response, cqe);
struct smbd_connection *info = response->info;
@@ -474,7 +479,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
switch (response->type) {
/* SMBD negotiation response */
case SMBD_NEGOTIATE_RESP:
- dump_smbd_negotiate_resp(smbd_response_payload(response));
+ dump_smbdirect_negotiate_resp(smbd_response_payload(response));
info->full_packet_received = true;
info->negotiate_done =
process_negotiation_response(response, wc->byte_len);
@@ -531,7 +536,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
/* Send a KEEP_ALIVE response right away if requested */
info->keep_alive_requested = KEEP_ALIVE_NONE;
if (le16_to_cpu(data_transfer->flags) &
- SMB_DIRECT_RESPONSE_REQUESTED) {
+ SMBDIRECT_FLAG_RESPONSE_REQUESTED) {
info->keep_alive_requested = KEEP_ALIVE_PENDING;
}
@@ -635,32 +640,34 @@ static int smbd_ia_open(
struct smbd_connection *info,
struct sockaddr *dstaddr, int port)
{
+ struct smbdirect_socket *sc = &info->socket;
int rc;
- info->id = smbd_create_id(info, dstaddr, port);
- if (IS_ERR(info->id)) {
- rc = PTR_ERR(info->id);
+ sc->rdma.cm_id = smbd_create_id(info, dstaddr, port);
+ if (IS_ERR(sc->rdma.cm_id)) {
+ rc = PTR_ERR(sc->rdma.cm_id);
goto out1;
}
+ sc->ib.dev = sc->rdma.cm_id->device;
- if (!frwr_is_supported(&info->id->device->attrs)) {
+ if (!frwr_is_supported(&sc->ib.dev->attrs)) {
log_rdma_event(ERR, "Fast Registration Work Requests (FRWR) is not supported\n");
log_rdma_event(ERR, "Device capability flags = %llx max_fast_reg_page_list_len = %u\n",
- info->id->device->attrs.device_cap_flags,
- info->id->device->attrs.max_fast_reg_page_list_len);
+ sc->ib.dev->attrs.device_cap_flags,
+ sc->ib.dev->attrs.max_fast_reg_page_list_len);
rc = -EPROTONOSUPPORT;
goto out2;
}
info->max_frmr_depth = min_t(int,
smbd_max_frmr_depth,
- info->id->device->attrs.max_fast_reg_page_list_len);
+ sc->ib.dev->attrs.max_fast_reg_page_list_len);
info->mr_type = IB_MR_TYPE_MEM_REG;
- if (info->id->device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)
+ if (sc->ib.dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)
info->mr_type = IB_MR_TYPE_SG_GAPS;
- info->pd = ib_alloc_pd(info->id->device, 0);
- if (IS_ERR(info->pd)) {
- rc = PTR_ERR(info->pd);
+ sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0);
+ if (IS_ERR(sc->ib.pd)) {
+ rc = PTR_ERR(sc->ib.pd);
log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc);
goto out2;
}
@@ -668,8 +675,8 @@ static int smbd_ia_open(
return 0;
out2:
- rdma_destroy_id(info->id);
- info->id = NULL;
+ rdma_destroy_id(sc->rdma.cm_id);
+ sc->rdma.cm_id = NULL;
out1:
return rc;
@@ -683,10 +690,12 @@ out1:
*/
static int smbd_post_send_negotiate_req(struct smbd_connection *info)
{
+ struct smbdirect_socket *sc = &info->socket;
+ struct smbdirect_socket_parameters *sp = &sc->parameters;
struct ib_send_wr send_wr;
int rc = -ENOMEM;
struct smbd_request *request;
- struct smbd_negotiate_req *packet;
+ struct smbdirect_negotiate_req *packet;
request = mempool_alloc(info->request_mempool, GFP_KERNEL);
if (!request)
@@ -695,29 +704,29 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info)
request->info = info;
packet = smbd_request_payload(request);
- packet->min_version = cpu_to_le16(SMBD_V1);
- packet->max_version = cpu_to_le16(SMBD_V1);
+ packet->min_version = cpu_to_le16(SMBDIRECT_V1);
+ packet->max_version = cpu_to_le16(SMBDIRECT_V1);
packet->reserved = 0;
- packet->credits_requested = cpu_to_le16(info->send_credit_target);
- packet->preferred_send_size = cpu_to_le32(info->max_send_size);
- packet->max_receive_size = cpu_to_le32(info->max_receive_size);
+ packet->credits_requested = cpu_to_le16(sp->send_credit_target);
+ packet->preferred_send_size = cpu_to_le32(sp->max_send_size);
+ packet->max_receive_size = cpu_to_le32(sp->max_recv_size);
packet->max_fragmented_size =
- cpu_to_le32(info->max_fragmented_recv_size);
+ cpu_to_le32(sp->max_fragmented_recv_size);
request->num_sge = 1;
request->sge[0].addr = ib_dma_map_single(
- info->id->device, (void *)packet,
+ sc->ib.dev, (void *)packet,
sizeof(*packet), DMA_TO_DEVICE);
- if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
+ if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) {
rc = -EIO;
goto dma_mapping_failed;
}
request->sge[0].length = sizeof(*packet);
- request->sge[0].lkey = info->pd->local_dma_lkey;
+ request->sge[0].lkey = sc->ib.pd->local_dma_lkey;
ib_dma_sync_single_for_device(
- info->id->device, request->sge[0].addr,
+ sc->ib.dev, request->sge[0].addr,
request->sge[0].length, DMA_TO_DEVICE);
request->cqe.done = send_done;
@@ -734,14 +743,14 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info)
request->sge[0].length, request->sge[0].lkey);
atomic_inc(&info->send_pending);
- rc = ib_post_send(info->id->qp, &send_wr, NULL);
+ rc = ib_post_send(sc->ib.qp, &send_wr, NULL);
if (!rc)
return 0;
/* if we reach here, post send failed */
log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
atomic_dec(&info->send_pending);
- ib_dma_unmap_single(info->id->device, request->sge[0].addr,
+ ib_dma_unmap_single(sc->ib.dev, request->sge[0].addr,
request->sge[0].length, DMA_TO_DEVICE);
smbd_disconnect_rdma_connection(info);
@@ -774,10 +783,10 @@ static int manage_credits_prior_sending(struct smbd_connection *info)
/*
* Check if we need to send a KEEP_ALIVE message
* The idle connection timer triggers a KEEP_ALIVE message when expires
- * SMB_DIRECT_RESPONSE_REQUESTED is set in the message flag to have peer send
+ * SMBDIRECT_FLAG_RESPONSE_REQUESTED is set in the message flag to have peer send
* back a response.
* return value:
- * 1 if SMB_DIRECT_RESPONSE_REQUESTED needs to be set
+ * 1 if SMBDIRECT_FLAG_RESPONSE_REQUESTED needs to be set
* 0: otherwise
*/
static int manage_keep_alive_before_sending(struct smbd_connection *info)
@@ -793,6 +802,8 @@ static int manage_keep_alive_before_sending(struct smbd_connection *info)
static int smbd_post_send(struct smbd_connection *info,
struct smbd_request *request)
{
+ struct smbdirect_socket *sc = &info->socket;
+ struct smbdirect_socket_parameters *sp = &sc->parameters;
struct ib_send_wr send_wr;
int rc, i;
@@ -801,7 +812,7 @@ static int smbd_post_send(struct smbd_connection *info,
"rdma_request sge[%d] addr=0x%llx length=%u\n",
i, request->sge[i].addr, request->sge[i].length);
ib_dma_sync_single_for_device(
- info->id->device,
+ sc->ib.dev,
request->sge[i].addr,
request->sge[i].length,
DMA_TO_DEVICE);
@@ -816,7 +827,7 @@ static int smbd_post_send(struct smbd_connection *info,
send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;
- rc = ib_post_send(info->id->qp, &send_wr, NULL);
+ rc = ib_post_send(sc->ib.qp, &send_wr, NULL);
if (rc) {
log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
smbd_disconnect_rdma_connection(info);
@@ -824,7 +835,7 @@ static int smbd_post_send(struct smbd_connection *info,
} else
/* Reset timer for idle connection after packet is sent */
mod_delayed_work(info->workqueue, &info->idle_timer_work,
- info->keep_alive_interval*HZ);
+ msecs_to_jiffies(sp->keepalive_interval_msec));
return rc;
}
@@ -833,22 +844,24 @@ static int smbd_post_send_iter(struct smbd_connection *info,
struct iov_iter *iter,
int *_remaining_data_length)
{
+ struct smbdirect_socket *sc = &info->socket;
+ struct smbdirect_socket_parameters *sp = &sc->parameters;
int i, rc;
int header_length;
int data_length;
struct smbd_request *request;
- struct smbd_data_transfer *packet;
+ struct smbdirect_data_transfer *packet;
int new_credits = 0;
wait_credit:
/* Wait for send credits. A SMBD packet needs one credit */
rc = wait_event_interruptible(info->wait_send_queue,
atomic_read(&info->send_credits) > 0 ||
- info->transport_status != SMBD_CONNECTED);
+ sc->status != SMBDIRECT_SOCKET_CONNECTED);
if (rc)
goto err_wait_credit;
- if (info->transport_status != SMBD_CONNECTED) {
+ if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
log_outgoing(ERR, "disconnected not sending on wait_credit\n");
rc = -EAGAIN;
goto err_wait_credit;
@@ -860,17 +873,17 @@ wait_credit:
wait_send_queue:
wait_event(info->wait_post_send,
- atomic_read(&info->send_pending) < info->send_credit_target ||
- info->transport_status != SMBD_CONNECTED);
+ atomic_read(&info->send_pending) < sp->send_credit_target ||
+ sc->status != SMBDIRECT_SOCKET_CONNECTED);
- if (info->transport_status != SMBD_CONNECTED) {
+ if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
log_outgoing(ERR, "disconnected not sending on wait_send_queue\n");
rc = -EAGAIN;
goto err_wait_send_queue;
}
if (unlikely(atomic_inc_return(&info->send_pending) >
- info->send_credit_target)) {
+ sp->send_credit_target)) {
atomic_dec(&info->send_pending);
goto wait_send_queue;
}
@@ -890,12 +903,14 @@ wait_send_queue:
.nr_sge = 1,
.max_sge = SMBDIRECT_MAX_SEND_SGE,
.sge = request->sge,
- .device = info->id->device,
- .local_dma_lkey = info->pd->local_dma_lkey,
+ .device = sc->ib.dev,
+ .local_dma_lkey = sc->ib.pd->local_dma_lkey,
.direction = DMA_TO_DEVICE,
};
+ size_t payload_len = umin(*_remaining_data_length,
+ sp->max_send_size - sizeof(*packet));
- rc = smb_extract_iter_to_rdma(iter, *_remaining_data_length,
+ rc = smb_extract_iter_to_rdma(iter, payload_len,
&extract);
if (rc < 0)
goto err_dma;
@@ -909,7 +924,7 @@ wait_send_queue:
/* Fill in the packet header */
packet = smbd_request_payload(request);
- packet->credits_requested = cpu_to_le16(info->send_credit_target);
+ packet->credits_requested = cpu_to_le16(sp->send_credit_target);
new_credits = manage_credits_prior_sending(info);
atomic_add(new_credits, &info->receive_credits);
@@ -919,7 +934,7 @@ wait_send_queue:
packet->flags = 0;
if (manage_keep_alive_before_sending(info))
- packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED);
+ packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED);
packet->reserved = 0;
if (!data_length)
@@ -938,23 +953,23 @@ wait_send_queue:
le32_to_cpu(packet->remaining_data_length));
/* Map the packet to DMA */
- header_length = sizeof(struct smbd_data_transfer);
+ header_length = sizeof(struct smbdirect_data_transfer);
/* If this is a packet without payload, don't send padding */
if (!data_length)
- header_length = offsetof(struct smbd_data_transfer, padding);
+ header_length = offsetof(struct smbdirect_data_transfer, padding);
- request->sge[0].addr = ib_dma_map_single(info->id->device,
+ request->sge[0].addr = ib_dma_map_single(sc->ib.dev,
(void *)packet,
header_length,
DMA_TO_DEVICE);
- if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
+ if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) {
rc = -EIO;
request->sge[0].addr = 0;
goto err_dma;
}
request->sge[0].length = header_length;
- request->sge[0].lkey = info->pd->local_dma_lkey;
+ request->sge[0].lkey = sc->ib.pd->local_dma_lkey;
rc = smbd_post_send(info, request);
if (!rc)
@@ -963,7 +978,7 @@ wait_send_queue:
err_dma:
for (i = 0; i < request->num_sge; i++)
if (request->sge[i].addr)
- ib_dma_unmap_single(info->id->device,
+ ib_dma_unmap_single(sc->ib.dev,
request->sge[i].addr,
request->sge[i].length,
DMA_TO_DEVICE);
@@ -1000,6 +1015,27 @@ static int smbd_post_send_empty(struct smbd_connection *info)
return smbd_post_send_iter(info, NULL, &remaining_data_length);
}
+static int smbd_post_send_full_iter(struct smbd_connection *info,
+ struct iov_iter *iter,
+ int *_remaining_data_length)
+{
+ int rc = 0;
+
+ /*
+ * smbd_post_send_iter() respects the
+ * negotiated max_send_size, so we need to
+ * loop until the full iter is posted
+ */
+
+ while (iov_iter_count(iter) > 0) {
+ rc = smbd_post_send_iter(info, iter, _remaining_data_length);
+ if (rc < 0)
+ break;
+ }
+
+ return rc;
+}
+
/*
* Post a receive request to the transport
* The remote peer can only send data when a receive request is posted
@@ -1008,17 +1044,19 @@ static int smbd_post_send_empty(struct smbd_connection *info)
static int smbd_post_recv(
struct smbd_connection *info, struct smbd_response *response)
{
+ struct smbdirect_socket *sc = &info->socket;
+ struct smbdirect_socket_parameters *sp = &sc->parameters;
struct ib_recv_wr recv_wr;
int rc = -EIO;
response->sge.addr = ib_dma_map_single(
- info->id->device, response->packet,
- info->max_receive_size, DMA_FROM_DEVICE);
- if (ib_dma_mapping_error(info->id->device, response->sge.addr))
+ sc->ib.dev, response->packet,
+ sp->max_recv_size, DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(sc->ib.dev, response->sge.addr))
return rc;
- response->sge.length = info->max_receive_size;
- response->sge.lkey = info->pd->local_dma_lkey;
+ response->sge.length = sp->max_recv_size;
+ response->sge.lkey = sc->ib.pd->local_dma_lkey;
response->cqe.done = recv_done;
@@ -1027,9 +1065,9 @@ static int smbd_post_recv(
recv_wr.sg_list = &response->sge;
recv_wr.num_sge = 1;
- rc = ib_post_recv(info->id->qp, &recv_wr, NULL);
+ rc = ib_post_recv(sc->ib.qp, &recv_wr, NULL);
if (rc) {
- ib_dma_unmap_single(info->id->device, response->sge.addr,
+ ib_dma_unmap_single(sc->ib.dev, response->sge.addr,
response->sge.length, DMA_FROM_DEVICE);
smbd_disconnect_rdma_connection(info);
log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc);
@@ -1187,9 +1225,10 @@ static struct smbd_response *get_receive_buffer(struct smbd_connection *info)
static void put_receive_buffer(
struct smbd_connection *info, struct smbd_response *response)
{
+ struct smbdirect_socket *sc = &info->socket;
unsigned long flags;
- ib_dma_unmap_single(info->id->device, response->sge.addr,
+ ib_dma_unmap_single(sc->ib.dev, response->sge.addr,
response->sge.length, DMA_FROM_DEVICE);
spin_lock_irqsave(&info->receive_queue_lock, flags);
@@ -1264,6 +1303,8 @@ static void idle_connection_timer(struct work_struct *work)
struct smbd_connection *info = container_of(
work, struct smbd_connection,
idle_timer_work.work);
+ struct smbdirect_socket *sc = &info->socket;
+ struct smbdirect_socket_parameters *sp = &sc->parameters;
if (info->keep_alive_requested != KEEP_ALIVE_NONE) {
log_keep_alive(ERR,
@@ -1278,7 +1319,7 @@ static void idle_connection_timer(struct work_struct *work)
/* Setup the next idle timeout work */
queue_delayed_work(info->workqueue, &info->idle_timer_work,
- info->keep_alive_interval*HZ);
+ msecs_to_jiffies(sp->keepalive_interval_msec));
}
/*
@@ -1289,6 +1330,8 @@ static void idle_connection_timer(struct work_struct *work)
void smbd_destroy(struct TCP_Server_Info *server)
{
struct smbd_connection *info = server->smbd_conn;
+ struct smbdirect_socket *sc;
+ struct smbdirect_socket_parameters *sp;
struct smbd_response *response;
unsigned long flags;
@@ -1296,19 +1339,22 @@ void smbd_destroy(struct TCP_Server_Info *server)
log_rdma_event(INFO, "rdma session already destroyed\n");
return;
}
+ sc = &info->socket;
+ sp = &sc->parameters;
log_rdma_event(INFO, "destroying rdma session\n");
- if (info->transport_status != SMBD_DISCONNECTED) {
- rdma_disconnect(server->smbd_conn->id);
+ if (sc->status != SMBDIRECT_SOCKET_DISCONNECTED) {
+ rdma_disconnect(sc->rdma.cm_id);
log_rdma_event(INFO, "wait for transport being disconnected\n");
wait_event_interruptible(
info->disconn_wait,
- info->transport_status == SMBD_DISCONNECTED);
+ sc->status == SMBDIRECT_SOCKET_DISCONNECTED);
}
log_rdma_event(INFO, "destroying qp\n");
- ib_drain_qp(info->id->qp);
- rdma_destroy_qp(info->id);
+ ib_drain_qp(sc->ib.qp);
+ rdma_destroy_qp(sc->rdma.cm_id);
+ sc->ib.qp = NULL;
log_rdma_event(INFO, "cancelling idle timer\n");
cancel_delayed_work_sync(&info->idle_timer_work);
@@ -1336,7 +1382,7 @@ void smbd_destroy(struct TCP_Server_Info *server)
log_rdma_event(INFO, "free receive buffers\n");
wait_event(info->wait_receive_queues,
info->count_receive_queue + info->count_empty_packet_queue
- == info->receive_credit_max);
+ == sp->recv_credit_max);
destroy_receive_buffers(info);
/*
@@ -1355,10 +1401,10 @@ void smbd_destroy(struct TCP_Server_Info *server)
}
destroy_mr_list(info);
- ib_free_cq(info->send_cq);
- ib_free_cq(info->recv_cq);
- ib_dealloc_pd(info->pd);
- rdma_destroy_id(info->id);
+ ib_free_cq(sc->ib.send_cq);
+ ib_free_cq(sc->ib.recv_cq);
+ ib_dealloc_pd(sc->ib.pd);
+ rdma_destroy_id(sc->rdma.cm_id);
/* free mempools */
mempool_destroy(info->request_mempool);
@@ -1367,7 +1413,7 @@ void smbd_destroy(struct TCP_Server_Info *server)
mempool_destroy(info->response_mempool);
kmem_cache_destroy(info->response_cache);
- info->transport_status = SMBD_DESTROYED;
+ sc->status = SMBDIRECT_SOCKET_DESTROYED;
destroy_workqueue(info->workqueue);
log_rdma_event(INFO, "rdma session destroyed\n");
@@ -1392,7 +1438,7 @@ int smbd_reconnect(struct TCP_Server_Info *server)
* This is possible if transport is disconnected and we haven't received
* notification from RDMA, but upper layer has detected timeout
*/
- if (server->smbd_conn->transport_status == SMBD_CONNECTED) {
+ if (server->smbd_conn->socket.status == SMBDIRECT_SOCKET_CONNECTED) {
log_rdma_event(INFO, "disconnecting transport\n");
smbd_destroy(server);
}
@@ -1424,37 +1470,47 @@ static void destroy_caches_and_workqueue(struct smbd_connection *info)
#define MAX_NAME_LEN 80
static int allocate_caches_and_workqueue(struct smbd_connection *info)
{
+ struct smbdirect_socket *sc = &info->socket;
+ struct smbdirect_socket_parameters *sp = &sc->parameters;
char name[MAX_NAME_LEN];
int rc;
+ if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer)))
+ return -ENOMEM;
+
scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info);
info->request_cache =
kmem_cache_create(
name,
sizeof(struct smbd_request) +
- sizeof(struct smbd_data_transfer),
+ sizeof(struct smbdirect_data_transfer),
0, SLAB_HWCACHE_ALIGN, NULL);
if (!info->request_cache)
return -ENOMEM;
info->request_mempool =
- mempool_create(info->send_credit_target, mempool_alloc_slab,
+ mempool_create(sp->send_credit_target, mempool_alloc_slab,
mempool_free_slab, info->request_cache);
if (!info->request_mempool)
goto out1;
scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info);
+
+ struct kmem_cache_args response_args = {
+ .align = __alignof__(struct smbd_response),
+ .useroffset = (offsetof(struct smbd_response, packet) +
+ sizeof(struct smbdirect_data_transfer)),
+ .usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer),
+ };
info->response_cache =
- kmem_cache_create(
- name,
- sizeof(struct smbd_response) +
- info->max_receive_size,
- 0, SLAB_HWCACHE_ALIGN, NULL);
+ kmem_cache_create(name,
+ sizeof(struct smbd_response) + sp->max_recv_size,
+ &response_args, SLAB_HWCACHE_ALIGN);
if (!info->response_cache)
goto out2;
info->response_mempool =
- mempool_create(info->receive_credit_max, mempool_alloc_slab,
+ mempool_create(sp->recv_credit_max, mempool_alloc_slab,
mempool_free_slab, info->response_cache);
if (!info->response_mempool)
goto out3;
@@ -1464,7 +1520,7 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info)
if (!info->workqueue)
goto out4;
- rc = allocate_receive_buffers(info, info->receive_credit_max);
+ rc = allocate_receive_buffers(info, sp->recv_credit_max);
if (rc) {
log_rdma_event(ERR, "failed to allocate receive buffers\n");
goto out5;
@@ -1491,6 +1547,8 @@ static struct smbd_connection *_smbd_get_connection(
{
int rc;
struct smbd_connection *info;
+ struct smbdirect_socket *sc;
+ struct smbdirect_socket_parameters *sp;
struct rdma_conn_param conn_param;
struct ib_qp_init_attr qp_attr;
struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr;
@@ -1500,101 +1558,102 @@ static struct smbd_connection *_smbd_get_connection(
info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL);
if (!info)
return NULL;
+ sc = &info->socket;
+ sp = &sc->parameters;
- info->transport_status = SMBD_CONNECTING;
+ sc->status = SMBDIRECT_SOCKET_CONNECTING;
rc = smbd_ia_open(info, dstaddr, port);
if (rc) {
log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc);
goto create_id_failed;
}
- if (smbd_send_credit_target > info->id->device->attrs.max_cqe ||
- smbd_send_credit_target > info->id->device->attrs.max_qp_wr) {
+ if (smbd_send_credit_target > sc->ib.dev->attrs.max_cqe ||
+ smbd_send_credit_target > sc->ib.dev->attrs.max_qp_wr) {
log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
smbd_send_credit_target,
- info->id->device->attrs.max_cqe,
- info->id->device->attrs.max_qp_wr);
+ sc->ib.dev->attrs.max_cqe,
+ sc->ib.dev->attrs.max_qp_wr);
goto config_failed;
}
- if (smbd_receive_credit_max > info->id->device->attrs.max_cqe ||
- smbd_receive_credit_max > info->id->device->attrs.max_qp_wr) {
+ if (smbd_receive_credit_max > sc->ib.dev->attrs.max_cqe ||
+ smbd_receive_credit_max > sc->ib.dev->attrs.max_qp_wr) {
log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
smbd_receive_credit_max,
- info->id->device->attrs.max_cqe,
- info->id->device->attrs.max_qp_wr);
+ sc->ib.dev->attrs.max_cqe,
+ sc->ib.dev->attrs.max_qp_wr);
goto config_failed;
}
- info->receive_credit_max = smbd_receive_credit_max;
- info->send_credit_target = smbd_send_credit_target;
- info->max_send_size = smbd_max_send_size;
- info->max_fragmented_recv_size = smbd_max_fragmented_recv_size;
- info->max_receive_size = smbd_max_receive_size;
- info->keep_alive_interval = smbd_keep_alive_interval;
+ sp->recv_credit_max = smbd_receive_credit_max;
+ sp->send_credit_target = smbd_send_credit_target;
+ sp->max_send_size = smbd_max_send_size;
+ sp->max_fragmented_recv_size = smbd_max_fragmented_recv_size;
+ sp->max_recv_size = smbd_max_receive_size;
+ sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000;
- if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE ||
- info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) {
+ if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE ||
+ sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) {
log_rdma_event(ERR,
"device %.*s max_send_sge/max_recv_sge = %d/%d too small\n",
IB_DEVICE_NAME_MAX,
- info->id->device->name,
- info->id->device->attrs.max_send_sge,
- info->id->device->attrs.max_recv_sge);
+ sc->ib.dev->name,
+ sc->ib.dev->attrs.max_send_sge,
+ sc->ib.dev->attrs.max_recv_sge);
goto config_failed;
}
- info->send_cq = NULL;
- info->recv_cq = NULL;
- info->send_cq =
- ib_alloc_cq_any(info->id->device, info,
- info->send_credit_target, IB_POLL_SOFTIRQ);
- if (IS_ERR(info->send_cq)) {
- info->send_cq = NULL;
+ sc->ib.send_cq =
+ ib_alloc_cq_any(sc->ib.dev, info,
+ sp->send_credit_target, IB_POLL_SOFTIRQ);
+ if (IS_ERR(sc->ib.send_cq)) {
+ sc->ib.send_cq = NULL;
goto alloc_cq_failed;
}
- info->recv_cq =
- ib_alloc_cq_any(info->id->device, info,
- info->receive_credit_max, IB_POLL_SOFTIRQ);
- if (IS_ERR(info->recv_cq)) {
- info->recv_cq = NULL;
+ sc->ib.recv_cq =
+ ib_alloc_cq_any(sc->ib.dev, info,
+ sp->recv_credit_max, IB_POLL_SOFTIRQ);
+ if (IS_ERR(sc->ib.recv_cq)) {
+ sc->ib.recv_cq = NULL;
goto alloc_cq_failed;
}
memset(&qp_attr, 0, sizeof(qp_attr));
qp_attr.event_handler = smbd_qp_async_error_upcall;
qp_attr.qp_context = info;
- qp_attr.cap.max_send_wr = info->send_credit_target;
- qp_attr.cap.max_recv_wr = info->receive_credit_max;
+ qp_attr.cap.max_send_wr = sp->send_credit_target;
+ qp_attr.cap.max_recv_wr = sp->recv_credit_max;
qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SEND_SGE;
qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_RECV_SGE;
qp_attr.cap.max_inline_data = 0;
qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
qp_attr.qp_type = IB_QPT_RC;
- qp_attr.send_cq = info->send_cq;
- qp_attr.recv_cq = info->recv_cq;
+ qp_attr.send_cq = sc->ib.send_cq;
+ qp_attr.recv_cq = sc->ib.recv_cq;
qp_attr.port_num = ~0;
- rc = rdma_create_qp(info->id, info->pd, &qp_attr);
+ rc = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr);
if (rc) {
log_rdma_event(ERR, "rdma_create_qp failed %i\n", rc);
goto create_qp_failed;
}
+ sc->ib.qp = sc->rdma.cm_id->qp;
memset(&conn_param, 0, sizeof(conn_param));
conn_param.initiator_depth = 0;
conn_param.responder_resources =
- min(info->id->device->attrs.max_qp_rd_atom,
+ min(sc->ib.dev->attrs.max_qp_rd_atom,
SMBD_CM_RESPONDER_RESOURCES);
info->responder_resources = conn_param.responder_resources;
log_rdma_mr(INFO, "responder_resources=%d\n",
info->responder_resources);
/* Need to send IRD/ORD in private data for iWARP */
- info->id->device->ops.get_port_immutable(
- info->id->device, info->id->port_num, &port_immutable);
+ sc->ib.dev->ops.get_port_immutable(
+ sc->ib.dev, sc->rdma.cm_id->port_num, &port_immutable);
if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
ird_ord_hdr[0] = info->responder_resources;
ird_ord_hdr[1] = 1;
@@ -1615,16 +1674,16 @@ static struct smbd_connection *_smbd_get_connection(
init_waitqueue_head(&info->conn_wait);
init_waitqueue_head(&info->disconn_wait);
init_waitqueue_head(&info->wait_reassembly_queue);
- rc = rdma_connect(info->id, &conn_param);
+ rc = rdma_connect(sc->rdma.cm_id, &conn_param);
if (rc) {
log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc);
goto rdma_connect_failed;
}
wait_event_interruptible(
- info->conn_wait, info->transport_status != SMBD_CONNECTING);
+ info->conn_wait, sc->status != SMBDIRECT_SOCKET_CONNECTING);
- if (info->transport_status != SMBD_CONNECTED) {
+ if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
log_rdma_event(ERR, "rdma_connect failed port=%d\n", port);
goto rdma_connect_failed;
}
@@ -1640,7 +1699,7 @@ static struct smbd_connection *_smbd_get_connection(
init_waitqueue_head(&info->wait_send_queue);
INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer);
queue_delayed_work(info->workqueue, &info->idle_timer_work,
- info->keep_alive_interval*HZ);
+ msecs_to_jiffies(sp->keepalive_interval_msec));
init_waitqueue_head(&info->wait_send_pending);
atomic_set(&info->send_pending, 0);
@@ -1675,26 +1734,26 @@ allocate_mr_failed:
negotiation_failed:
cancel_delayed_work_sync(&info->idle_timer_work);
destroy_caches_and_workqueue(info);
- info->transport_status = SMBD_NEGOTIATE_FAILED;
+ sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED;
init_waitqueue_head(&info->conn_wait);
- rdma_disconnect(info->id);
+ rdma_disconnect(sc->rdma.cm_id);
wait_event(info->conn_wait,
- info->transport_status == SMBD_DISCONNECTED);
+ sc->status == SMBDIRECT_SOCKET_DISCONNECTED);
allocate_cache_failed:
rdma_connect_failed:
- rdma_destroy_qp(info->id);
+ rdma_destroy_qp(sc->rdma.cm_id);
create_qp_failed:
alloc_cq_failed:
- if (info->send_cq)
- ib_free_cq(info->send_cq);
- if (info->recv_cq)
- ib_free_cq(info->recv_cq);
+ if (sc->ib.send_cq)
+ ib_free_cq(sc->ib.send_cq);
+ if (sc->ib.recv_cq)
+ ib_free_cq(sc->ib.recv_cq);
config_failed:
- ib_dealloc_pd(info->pd);
- rdma_destroy_id(info->id);
+ ib_dealloc_pd(sc->ib.pd);
+ rdma_destroy_id(sc->rdma.cm_id);
create_id_failed:
kfree(info);
@@ -1719,34 +1778,39 @@ try_again:
}
/*
- * Receive data from receive reassembly queue
+ * Receive data from the transport's receive reassembly queue
* All the incoming data packets are placed in reassembly queue
- * buf: the buffer to read data into
+ * iter: the buffer to read data into
* size: the length of data to read
* return value: actual data read
- * Note: this implementation copies the data from reassebmly queue to receive
+ *
+ * Note: this implementation copies the data from reassembly queue to receive
* buffers used by upper layer. This is not the optimal code path. A better way
* to do it is to not have upper layer allocate its receive buffers but rather
* borrow the buffer from reassembly queue, and return it after data is
* consumed. But this will require more changes to upper layer code, and also
* need to consider packet boundaries while they still being reassembled.
*/
-static int smbd_recv_buf(struct smbd_connection *info, char *buf,
- unsigned int size)
+int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
{
+ struct smbdirect_socket *sc = &info->socket;
struct smbd_response *response;
- struct smbd_data_transfer *data_transfer;
+ struct smbdirect_data_transfer *data_transfer;
+ size_t size = iov_iter_count(&msg->msg_iter);
int to_copy, to_read, data_read, offset;
u32 data_length, remaining_data_length, data_offset;
int rc;
+ if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) == WRITE))
+ return -EINVAL; /* It's a bug in upper layer to get there */
+
again:
/*
* No need to hold the reassembly queue lock all the time as we are
* the only one reading from the front of the queue. The transport
* may add more entries to the back of the queue at the same time
*/
- log_read(INFO, "size=%d info->reassembly_data_length=%d\n", size,
+ log_read(INFO, "size=%zd info->reassembly_data_length=%d\n", size,
info->reassembly_data_length);
if (info->reassembly_data_length >= size) {
int queue_length;
@@ -1784,7 +1848,10 @@ again:
if (response->first_segment && size == 4) {
unsigned int rfc1002_len =
data_length + remaining_data_length;
- *((__be32 *)buf) = cpu_to_be32(rfc1002_len);
+ __be32 rfc1002_hdr = cpu_to_be32(rfc1002_len);
+ if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr),
+ &msg->msg_iter) != sizeof(rfc1002_hdr))
+ return -EFAULT;
data_read = 4;
response->first_segment = false;
log_read(INFO, "returning rfc1002 length %d\n",
@@ -1793,10 +1860,9 @@ again:
}
to_copy = min_t(int, data_length - offset, to_read);
- memcpy(
- buf + data_read,
- (char *)data_transfer + data_offset + offset,
- to_copy);
+ if (copy_to_iter((char *)data_transfer + data_offset + offset,
+ to_copy, &msg->msg_iter) != to_copy)
+ return -EFAULT;
/* move on to the next buffer? */
if (to_copy == data_length - offset) {
@@ -1848,12 +1914,12 @@ read_rfc1002_done:
rc = wait_event_interruptible(
info->wait_reassembly_queue,
info->reassembly_data_length >= size ||
- info->transport_status != SMBD_CONNECTED);
+ sc->status != SMBDIRECT_SOCKET_CONNECTED);
/* Don't return any data if interrupted */
if (rc)
return rc;
- if (info->transport_status != SMBD_CONNECTED) {
+ if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
log_read(ERR, "disconnected\n");
return -ECONNABORTED;
}
@@ -1862,89 +1928,6 @@ read_rfc1002_done:
}
/*
- * Receive a page from receive reassembly queue
- * page: the page to read data into
- * to_read: the length of data to read
- * return value: actual data read
- */
-static int smbd_recv_page(struct smbd_connection *info,
- struct page *page, unsigned int page_offset,
- unsigned int to_read)
-{
- int ret;
- char *to_address;
- void *page_address;
-
- /* make sure we have the page ready for read */
- ret = wait_event_interruptible(
- info->wait_reassembly_queue,
- info->reassembly_data_length >= to_read ||
- info->transport_status != SMBD_CONNECTED);
- if (ret)
- return ret;
-
- /* now we can read from reassembly queue and not sleep */
- page_address = kmap_atomic(page);
- to_address = (char *) page_address + page_offset;
-
- log_read(INFO, "reading from page=%p address=%p to_read=%d\n",
- page, to_address, to_read);
-
- ret = smbd_recv_buf(info, to_address, to_read);
- kunmap_atomic(page_address);
-
- return ret;
-}
-
-/*
- * Receive data from transport
- * msg: a msghdr point to the buffer, can be ITER_KVEC or ITER_BVEC
- * return: total bytes read, or 0. SMB Direct will not do partial read.
- */
-int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
-{
- char *buf;
- struct page *page;
- unsigned int to_read, page_offset;
- int rc;
-
- if (iov_iter_rw(&msg->msg_iter) == WRITE) {
- /* It's a bug in upper layer to get there */
- cifs_dbg(VFS, "Invalid msg iter dir %u\n",
- iov_iter_rw(&msg->msg_iter));
- rc = -EINVAL;
- goto out;
- }
-
- switch (iov_iter_type(&msg->msg_iter)) {
- case ITER_KVEC:
- buf = msg->msg_iter.kvec->iov_base;
- to_read = msg->msg_iter.kvec->iov_len;
- rc = smbd_recv_buf(info, buf, to_read);
- break;
-
- case ITER_BVEC:
- page = msg->msg_iter.bvec->bv_page;
- page_offset = msg->msg_iter.bvec->bv_offset;
- to_read = msg->msg_iter.bvec->bv_len;
- rc = smbd_recv_page(info, page, page_offset, to_read);
- break;
-
- default:
- /* It's a bug in upper layer to get there */
- cifs_dbg(VFS, "Invalid msg type %d\n",
- iov_iter_type(&msg->msg_iter));
- rc = -EINVAL;
- }
-
-out:
- /* SMBDirect will read it all or nothing */
- if (rc > 0)
- msg->msg_iter.count = 0;
- return rc;
-}
-
-/*
* Send data to transport
* Each rqst is transported as a SMBDirect payload
* rqst: the data to write
@@ -1954,12 +1937,14 @@ int smbd_send(struct TCP_Server_Info *server,
int num_rqst, struct smb_rqst *rqst_array)
{
struct smbd_connection *info = server->smbd_conn;
+ struct smbdirect_socket *sc = &info->socket;
+ struct smbdirect_socket_parameters *sp = &sc->parameters;
struct smb_rqst *rqst;
struct iov_iter iter;
unsigned int remaining_data_length, klen;
int rc, i, rqst_idx;
- if (info->transport_status != SMBD_CONNECTED)
+ if (sc->status != SMBDIRECT_SOCKET_CONNECTED)
return -EAGAIN;
/*
@@ -1971,10 +1956,10 @@ int smbd_send(struct TCP_Server_Info *server,
for (i = 0; i < num_rqst; i++)
remaining_data_length += smb_rqst_len(server, &rqst_array[i]);
- if (unlikely(remaining_data_length > info->max_fragmented_send_size)) {
+ if (unlikely(remaining_data_length > sp->max_fragmented_send_size)) {
/* assertion: payload never exceeds negotiated maximum */
log_write(ERR, "payload size %d > max size %d\n",
- remaining_data_length, info->max_fragmented_send_size);
+ remaining_data_length, sp->max_fragmented_send_size);
return -EINVAL;
}
@@ -2000,14 +1985,14 @@ int smbd_send(struct TCP_Server_Info *server,
klen += rqst->rq_iov[i].iov_len;
iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen);
- rc = smbd_post_send_iter(info, &iter, &remaining_data_length);
+ rc = smbd_post_send_full_iter(info, &iter, &remaining_data_length);
if (rc < 0)
break;
if (iov_iter_count(&rqst->rq_iter) > 0) {
/* And then the data pages if there are any */
- rc = smbd_post_send_iter(info, &rqst->rq_iter,
- &remaining_data_length);
+ rc = smbd_post_send_full_iter(info, &rqst->rq_iter,
+ &remaining_data_length);
if (rc < 0)
break;
}
@@ -2053,6 +2038,7 @@ static void smbd_mr_recovery_work(struct work_struct *work)
{
struct smbd_connection *info =
container_of(work, struct smbd_connection, mr_recovery_work);
+ struct smbdirect_socket *sc = &info->socket;
struct smbd_mr *smbdirect_mr;
int rc;
@@ -2070,7 +2056,7 @@ static void smbd_mr_recovery_work(struct work_struct *work)
}
smbdirect_mr->mr = ib_alloc_mr(
- info->pd, info->mr_type,
+ sc->ib.pd, info->mr_type,
info->max_frmr_depth);
if (IS_ERR(smbdirect_mr->mr)) {
log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n",
@@ -2099,12 +2085,13 @@ static void smbd_mr_recovery_work(struct work_struct *work)
static void destroy_mr_list(struct smbd_connection *info)
{
+ struct smbdirect_socket *sc = &info->socket;
struct smbd_mr *mr, *tmp;
cancel_work_sync(&info->mr_recovery_work);
list_for_each_entry_safe(mr, tmp, &info->mr_list, list) {
if (mr->state == MR_INVALIDATED)
- ib_dma_unmap_sg(info->id->device, mr->sgt.sgl,
+ ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl,
mr->sgt.nents, mr->dir);
ib_dereg_mr(mr->mr);
kfree(mr->sgt.sgl);
@@ -2121,6 +2108,7 @@ static void destroy_mr_list(struct smbd_connection *info)
*/
static int allocate_mr_list(struct smbd_connection *info)
{
+ struct smbdirect_socket *sc = &info->socket;
int i;
struct smbd_mr *smbdirect_mr, *tmp;
@@ -2136,7 +2124,7 @@ static int allocate_mr_list(struct smbd_connection *info)
smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL);
if (!smbdirect_mr)
goto cleanup_entries;
- smbdirect_mr->mr = ib_alloc_mr(info->pd, info->mr_type,
+ smbdirect_mr->mr = ib_alloc_mr(sc->ib.pd, info->mr_type,
info->max_frmr_depth);
if (IS_ERR(smbdirect_mr->mr)) {
log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n",
@@ -2181,20 +2169,20 @@ cleanup_entries:
*/
static struct smbd_mr *get_mr(struct smbd_connection *info)
{
+ struct smbdirect_socket *sc = &info->socket;
struct smbd_mr *ret;
int rc;
again:
rc = wait_event_interruptible(info->wait_mr,
atomic_read(&info->mr_ready_count) ||
- info->transport_status != SMBD_CONNECTED);
+ sc->status != SMBDIRECT_SOCKET_CONNECTED);
if (rc) {
log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc);
return NULL;
}
- if (info->transport_status != SMBD_CONNECTED) {
- log_rdma_mr(ERR, "info->transport_status=%x\n",
- info->transport_status);
+ if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
+ log_rdma_mr(ERR, "sc->status=%x\n", sc->status);
return NULL;
}
@@ -2247,6 +2235,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info,
struct iov_iter *iter,
bool writing, bool need_invalidate)
{
+ struct smbdirect_socket *sc = &info->socket;
struct smbd_mr *smbdirect_mr;
int rc, num_pages;
enum dma_data_direction dir;
@@ -2276,7 +2265,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info,
num_pages, iov_iter_count(iter), info->max_frmr_depth);
smbd_iter_to_mr(info, iter, &smbdirect_mr->sgt, info->max_frmr_depth);
- rc = ib_dma_map_sg(info->id->device, smbdirect_mr->sgt.sgl,
+ rc = ib_dma_map_sg(sc->ib.dev, smbdirect_mr->sgt.sgl,
smbdirect_mr->sgt.nents, dir);
if (!rc) {
log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n",
@@ -2312,7 +2301,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info,
* on IB_WR_REG_MR. Hardware enforces a barrier and order of execution
* on the next ib_post_send when we actually send I/O to remote peer
*/
- rc = ib_post_send(info->id->qp, &reg_wr->wr, NULL);
+ rc = ib_post_send(sc->ib.qp, &reg_wr->wr, NULL);
if (!rc)
return smbdirect_mr;
@@ -2321,7 +2310,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info,
/* If all failed, attempt to recover this MR by setting it MR_ERROR*/
map_mr_error:
- ib_dma_unmap_sg(info->id->device, smbdirect_mr->sgt.sgl,
+ ib_dma_unmap_sg(sc->ib.dev, smbdirect_mr->sgt.sgl,
smbdirect_mr->sgt.nents, smbdirect_mr->dir);
dma_map_error:
@@ -2359,6 +2348,7 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr)
{
struct ib_send_wr *wr;
struct smbd_connection *info = smbdirect_mr->conn;
+ struct smbdirect_socket *sc = &info->socket;
int rc = 0;
if (smbdirect_mr->need_invalidate) {
@@ -2372,7 +2362,7 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr)
wr->send_flags = IB_SEND_SIGNALED;
init_completion(&smbdirect_mr->invalidate_done);
- rc = ib_post_send(info->id->qp, wr, NULL);
+ rc = ib_post_send(sc->ib.qp, wr, NULL);
if (rc) {
log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc);
smbd_disconnect_rdma_connection(info);
@@ -2389,7 +2379,7 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr)
if (smbdirect_mr->state == MR_INVALIDATED) {
ib_dma_unmap_sg(
- info->id->device, smbdirect_mr->sgt.sgl,
+ sc->ib.dev, smbdirect_mr->sgt.sgl,
smbdirect_mr->sgt.nents,
smbdirect_mr->dir);
smbdirect_mr->state = MR_READY;
diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h
index c08e3665150d..3d552ab27e0f 100644
--- a/fs/smb/client/smbdirect.h
+++ b/fs/smb/client/smbdirect.h
@@ -15,6 +15,9 @@
#include <rdma/rdma_cm.h>
#include <linux/mempool.h>
+#include "../common/smbdirect/smbdirect.h"
+#include "../common/smbdirect/smbdirect_socket.h"
+
extern int rdma_readwrite_threshold;
extern int smbd_max_frmr_depth;
extern int smbd_keep_alive_interval;
@@ -50,14 +53,8 @@ enum smbd_connection_status {
* 5. mempools for allocating packets
*/
struct smbd_connection {
- enum smbd_connection_status transport_status;
-
- /* RDMA related */
- struct rdma_cm_id *id;
- struct ib_qp_init_attr qp_attr;
- struct ib_pd *pd;
- struct ib_cq *send_cq, *recv_cq;
- struct ib_device_attr dev_attr;
+ struct smbdirect_socket socket;
+
int ri_rc;
struct completion ri_done;
wait_queue_head_t conn_wait;
@@ -72,15 +69,7 @@ struct smbd_connection {
spinlock_t lock_new_credits_offered;
int new_credits_offered;
- /* Connection parameters defined in [MS-SMBD] 3.1.1.1 */
- int receive_credit_max;
- int send_credit_target;
- int max_send_size;
- int max_fragmented_recv_size;
- int max_fragmented_send_size;
- int max_receive_size;
- int keep_alive_interval;
- int max_readwrite_size;
+ /* dynamic connection parameters defined in [MS-SMBD] 3.1.1.1 */
enum keep_alive_status keep_alive_requested;
int protocol;
atomic_t send_credits;
@@ -177,47 +166,6 @@ enum smbd_message_type {
SMBD_TRANSFER_DATA,
};
-#define SMB_DIRECT_RESPONSE_REQUESTED 0x0001
-
-/* SMBD negotiation request packet [MS-SMBD] 2.2.1 */
-struct smbd_negotiate_req {
- __le16 min_version;
- __le16 max_version;
- __le16 reserved;
- __le16 credits_requested;
- __le32 preferred_send_size;
- __le32 max_receive_size;
- __le32 max_fragmented_size;
-} __packed;
-
-/* SMBD negotiation response packet [MS-SMBD] 2.2.2 */
-struct smbd_negotiate_resp {
- __le16 min_version;
- __le16 max_version;
- __le16 negotiated_version;
- __le16 reserved;
- __le16 credits_requested;
- __le16 credits_granted;
- __le32 status;
- __le32 max_readwrite_size;
- __le32 preferred_send_size;
- __le32 max_receive_size;
- __le32 max_fragmented_size;
-} __packed;
-
-/* SMBD data transfer packet with payload [MS-SMBD] 2.2.3 */
-struct smbd_data_transfer {
- __le16 credits_requested;
- __le16 credits_granted;
- __le16 flags;
- __le16 reserved;
- __le32 remaining_data_length;
- __le32 data_offset;
- __le32 data_length;
- __le32 padding;
- __u8 buffer[];
-} __packed;
-
/* The packet fields for a registered RDMA buffer */
struct smbd_buffer_descriptor_v1 {
__le64 offset;
diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h
index 52bcb55d9952..93e5b2bb9f28 100644
--- a/fs/smb/client/trace.h
+++ b/fs/smb/client/trace.h
@@ -140,7 +140,7 @@ DECLARE_EVENT_CLASS(smb3_rw_err_class,
__entry->len = len;
__entry->rc = rc;
),
- TP_printk("\tR=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d",
+ TP_printk("R=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d",
__entry->rreq_debug_id, __entry->rreq_debug_index,
__entry->xid, __entry->sesid, __entry->tid, __entry->fid,
__entry->offset, __entry->len, __entry->rc)
@@ -190,7 +190,7 @@ DECLARE_EVENT_CLASS(smb3_other_err_class,
__entry->len = len;
__entry->rc = rc;
),
- TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d",
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d",
__entry->xid, __entry->sesid, __entry->tid, __entry->fid,
__entry->offset, __entry->len, __entry->rc)
)
@@ -247,7 +247,7 @@ DECLARE_EVENT_CLASS(smb3_copy_range_err_class,
__entry->len = len;
__entry->rc = rc;
),
- TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x rc=%d",
+ TP_printk("xid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x rc=%d",
__entry->xid, __entry->sesid, __entry->tid, __entry->target_fid,
__entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len, __entry->rc)
)
@@ -298,7 +298,7 @@ DECLARE_EVENT_CLASS(smb3_copy_range_done_class,
__entry->target_offset = target_offset;
__entry->len = len;
),
- TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x",
+ TP_printk("xid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x",
__entry->xid, __entry->sesid, __entry->tid, __entry->target_fid,
__entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len)
)
@@ -482,7 +482,7 @@ DECLARE_EVENT_CLASS(smb3_fd_class,
__entry->tid = tid;
__entry->sesid = sesid;
),
- TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx",
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx",
__entry->xid, __entry->sesid, __entry->tid, __entry->fid)
)
@@ -521,7 +521,7 @@ DECLARE_EVENT_CLASS(smb3_fd_err_class,
__entry->sesid = sesid;
__entry->rc = rc;
),
- TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d",
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d",
__entry->xid, __entry->sesid, __entry->tid, __entry->fid,
__entry->rc)
)
@@ -794,7 +794,7 @@ DECLARE_EVENT_CLASS(smb3_cmd_err_class,
__entry->status = status;
__entry->rc = rc;
),
- TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d",
+ TP_printk("sid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d",
__entry->sesid, __entry->tid, __entry->cmd, __entry->mid,
__entry->status, __entry->rc)
)
@@ -829,7 +829,7 @@ DECLARE_EVENT_CLASS(smb3_cmd_done_class,
__entry->cmd = cmd;
__entry->mid = mid;
),
- TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu",
+ TP_printk("sid=0x%llx tid=0x%x cmd=%u mid=%llu",
__entry->sesid, __entry->tid,
__entry->cmd, __entry->mid)
)
@@ -867,7 +867,7 @@ DECLARE_EVENT_CLASS(smb3_mid_class,
__entry->when_sent = when_sent;
__entry->when_received = when_received;
),
- TP_printk("\tcmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu",
+ TP_printk("cmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu",
__entry->cmd, __entry->mid, __entry->pid, __entry->when_sent,
__entry->when_received)
)
@@ -898,7 +898,7 @@ DECLARE_EVENT_CLASS(smb3_exit_err_class,
__assign_str(func_name);
__entry->rc = rc;
),
- TP_printk("\t%s: xid=%u rc=%d",
+ TP_printk("%s: xid=%u rc=%d",
__get_str(func_name), __entry->xid, __entry->rc)
)
@@ -924,7 +924,7 @@ DECLARE_EVENT_CLASS(smb3_sync_err_class,
__entry->ino = ino;
__entry->rc = rc;
),
- TP_printk("\tino=%lu rc=%d",
+ TP_printk("ino=%lu rc=%d",
__entry->ino, __entry->rc)
)
@@ -950,7 +950,7 @@ DECLARE_EVENT_CLASS(smb3_enter_exit_class,
__entry->xid = xid;
__assign_str(func_name);
),
- TP_printk("\t%s: xid=%u",
+ TP_printk("%s: xid=%u",
__get_str(func_name), __entry->xid)
)
diff --git a/fs/smb/common/smbdirect/smbdirect.h b/fs/smb/common/smbdirect/smbdirect.h
new file mode 100644
index 000000000000..b9a385344ff3
--- /dev/null
+++ b/fs/smb/common/smbdirect/smbdirect.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2017, Microsoft Corporation.
+ * Copyright (C) 2018, LG Electronics.
+ */
+
+#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__
+#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__
+
+/* SMB-DIRECT buffer descriptor V1 structure [MS-SMBD] 2.2.3.1 */
+struct smbdirect_buffer_descriptor_v1 {
+ __le64 offset;
+ __le32 token;
+ __le32 length;
+} __packed;
+
+/*
+ * Connection parameters mostly from [MS-SMBD] 3.1.1.1
+ *
+ * These are setup and negotiated at the beginning of a
+ * connection and remain constant unless explicitly changed.
+ *
+ * Some values are important for the upper layer.
+ */
+struct smbdirect_socket_parameters {
+ __u16 recv_credit_max;
+ __u16 send_credit_target;
+ __u32 max_send_size;
+ __u32 max_fragmented_send_size;
+ __u32 max_recv_size;
+ __u32 max_fragmented_recv_size;
+ __u32 max_read_write_size;
+ __u32 keepalive_interval_msec;
+ __u32 keepalive_timeout_msec;
+} __packed;
+
+#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ */
diff --git a/fs/smb/common/smbdirect/smbdirect_pdu.h b/fs/smb/common/smbdirect/smbdirect_pdu.h
new file mode 100644
index 000000000000..ae9fdb05ce23
--- /dev/null
+++ b/fs/smb/common/smbdirect/smbdirect_pdu.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2017 Stefan Metzmacher
+ */
+
+#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PDU_H__
+#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PDU_H__
+
+#define SMBDIRECT_V1 0x0100
+
+/* SMBD negotiation request packet [MS-SMBD] 2.2.1 */
+struct smbdirect_negotiate_req {
+ __le16 min_version;
+ __le16 max_version;
+ __le16 reserved;
+ __le16 credits_requested;
+ __le32 preferred_send_size;
+ __le32 max_receive_size;
+ __le32 max_fragmented_size;
+} __packed;
+
+/* SMBD negotiation response packet [MS-SMBD] 2.2.2 */
+struct smbdirect_negotiate_resp {
+ __le16 min_version;
+ __le16 max_version;
+ __le16 negotiated_version;
+ __le16 reserved;
+ __le16 credits_requested;
+ __le16 credits_granted;
+ __le32 status;
+ __le32 max_readwrite_size;
+ __le32 preferred_send_size;
+ __le32 max_receive_size;
+ __le32 max_fragmented_size;
+} __packed;
+
+#define SMBDIRECT_DATA_MIN_HDR_SIZE 0x14
+#define SMBDIRECT_DATA_OFFSET 0x18
+
+#define SMBDIRECT_FLAG_RESPONSE_REQUESTED 0x0001
+
+/* SMBD data transfer packet with payload [MS-SMBD] 2.2.3 */
+struct smbdirect_data_transfer {
+ __le16 credits_requested;
+ __le16 credits_granted;
+ __le16 flags;
+ __le16 reserved;
+ __le32 remaining_data_length;
+ __le32 data_offset;
+ __le32 data_length;
+ __le32 padding;
+ __u8 buffer[];
+} __packed;
+
+#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PDU_H__ */
diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h
new file mode 100644
index 000000000000..e5b15cc44a7b
--- /dev/null
+++ b/fs/smb/common/smbdirect/smbdirect_socket.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2025 Stefan Metzmacher
+ */
+
+#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__
+#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__
+
+enum smbdirect_socket_status {
+ SMBDIRECT_SOCKET_CREATED,
+ SMBDIRECT_SOCKET_CONNECTING,
+ SMBDIRECT_SOCKET_CONNECTED,
+ SMBDIRECT_SOCKET_NEGOTIATE_FAILED,
+ SMBDIRECT_SOCKET_DISCONNECTING,
+ SMBDIRECT_SOCKET_DISCONNECTED,
+ SMBDIRECT_SOCKET_DESTROYED
+};
+
+struct smbdirect_socket {
+ enum smbdirect_socket_status status;
+
+ /* RDMA related */
+ struct {
+ struct rdma_cm_id *cm_id;
+ } rdma;
+
+ /* IB verbs related */
+ struct {
+ struct ib_pd *pd;
+ struct ib_cq *send_cq;
+ struct ib_cq *recv_cq;
+
+ /*
+ * shortcuts for rdma.cm_id->{qp,device};
+ */
+ struct ib_qp *qp;
+ struct ib_device *dev;
+ } ib;
+
+ struct smbdirect_socket_parameters parameters;
+};
+
+#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */
diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h
index 572102098c10..dd3e0e3f7bf0 100644
--- a/fs/smb/server/connection.h
+++ b/fs/smb/server/connection.h
@@ -108,6 +108,7 @@ struct ksmbd_conn {
__le16 signing_algorithm;
bool binding;
atomic_t refcnt;
+ bool is_aapl;
};
struct ksmbd_conn_ops {
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index c6b990c93bfa..f1c7ed1a6ca5 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -2875,7 +2875,7 @@ int smb2_open(struct ksmbd_work *work)
int req_op_level = 0, open_flags = 0, may_flags = 0, file_info = 0;
int rc = 0;
int contxt_cnt = 0, query_disk_id = 0;
- int maximal_access_ctxt = 0, posix_ctxt = 0;
+ bool maximal_access_ctxt = false, posix_ctxt = false;
int s_type = 0;
int next_off = 0;
char *name = NULL;
@@ -2904,6 +2904,27 @@ int smb2_open(struct ksmbd_work *work)
return create_smb2_pipe(work);
}
+ if (req->CreateContextsOffset && tcon->posix_extensions) {
+ context = smb2_find_context_vals(req, SMB2_CREATE_TAG_POSIX, 16);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out2;
+ } else if (context) {
+ struct create_posix *posix = (struct create_posix *)context;
+
+ if (le16_to_cpu(context->DataOffset) +
+ le32_to_cpu(context->DataLength) <
+ sizeof(struct create_posix) - 4) {
+ rc = -EINVAL;
+ goto err_out2;
+ }
+ ksmbd_debug(SMB, "get posix context\n");
+
+ posix_mode = le32_to_cpu(posix->Mode);
+ posix_ctxt = true;
+ }
+ }
+
if (req->NameLength) {
name = smb2_get_name((char *)req + le16_to_cpu(req->NameOffset),
le16_to_cpu(req->NameLength),
@@ -2926,9 +2947,11 @@ int smb2_open(struct ksmbd_work *work)
goto err_out2;
}
- rc = ksmbd_validate_filename(name);
- if (rc < 0)
- goto err_out2;
+ if (posix_ctxt == false) {
+ rc = ksmbd_validate_filename(name);
+ if (rc < 0)
+ goto err_out2;
+ }
if (ksmbd_share_veto_filename(share, name)) {
rc = -ENOENT;
@@ -3086,28 +3109,6 @@ int smb2_open(struct ksmbd_work *work)
rc = -EBADF;
goto err_out2;
}
-
- if (tcon->posix_extensions) {
- context = smb2_find_context_vals(req,
- SMB2_CREATE_TAG_POSIX, 16);
- if (IS_ERR(context)) {
- rc = PTR_ERR(context);
- goto err_out2;
- } else if (context) {
- struct create_posix *posix =
- (struct create_posix *)context;
- if (le16_to_cpu(context->DataOffset) +
- le32_to_cpu(context->DataLength) <
- sizeof(struct create_posix) - 4) {
- rc = -EINVAL;
- goto err_out2;
- }
- ksmbd_debug(SMB, "get posix context\n");
-
- posix_mode = le32_to_cpu(posix->Mode);
- posix_ctxt = 1;
- }
- }
}
if (ksmbd_override_fsids(work)) {
@@ -3540,6 +3541,15 @@ int smb2_open(struct ksmbd_work *work)
ksmbd_debug(SMB, "get query on disk id context\n");
query_disk_id = 1;
}
+
+ if (conn->is_aapl == false) {
+ context = smb2_find_context_vals(req, SMB2_CREATE_AAPL, 4);
+ if (IS_ERR(context)) {
+ rc = PTR_ERR(context);
+ goto err_out1;
+ } else if (context)
+ conn->is_aapl = true;
+ }
}
rc = ksmbd_vfs_getattr(&path, &stat);
@@ -3979,7 +3989,10 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
if (dinfo->EaSize)
dinfo->ExtFileAttributes = FILE_ATTRIBUTE_REPARSE_POINT_LE;
dinfo->Reserved = 0;
- dinfo->UniqueId = cpu_to_le64(ksmbd_kstat->kstat->ino);
+ if (conn->is_aapl)
+ dinfo->UniqueId = 0;
+ else
+ dinfo->UniqueId = cpu_to_le64(ksmbd_kstat->kstat->ino);
if (d_info->hide_dot_file && d_info->name[0] == '.')
dinfo->ExtFileAttributes |= FILE_ATTRIBUTE_HIDDEN_LE;
memcpy(dinfo->FileName, conv_name, conv_len);
@@ -3996,7 +4009,10 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
if (fibdinfo->EaSize)
fibdinfo->ExtFileAttributes = FILE_ATTRIBUTE_REPARSE_POINT_LE;
- fibdinfo->UniqueId = cpu_to_le64(ksmbd_kstat->kstat->ino);
+ if (conn->is_aapl)
+ fibdinfo->UniqueId = 0;
+ else
+ fibdinfo->UniqueId = cpu_to_le64(ksmbd_kstat->kstat->ino);
fibdinfo->ShortNameLength = 0;
fibdinfo->Reserved = 0;
fibdinfo->Reserved2 = cpu_to_le16(0);
@@ -8519,11 +8535,6 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work)
goto err_out;
}
- opinfo->op_state = OPLOCK_STATE_NONE;
- wake_up_interruptible_all(&opinfo->oplock_q);
- opinfo_put(opinfo);
- ksmbd_fd_put(work, fp);
-
rsp->StructureSize = cpu_to_le16(24);
rsp->OplockLevel = rsp_oplevel;
rsp->Reserved = 0;
@@ -8531,16 +8542,15 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work)
rsp->VolatileFid = volatile_id;
rsp->PersistentFid = persistent_id;
ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_oplock_break));
- if (!ret)
- return;
-
+ if (ret) {
err_out:
+ smb2_set_err_rsp(work);
+ }
+
opinfo->op_state = OPLOCK_STATE_NONE;
wake_up_interruptible_all(&opinfo->oplock_q);
-
opinfo_put(opinfo);
ksmbd_fd_put(work, fp);
- smb2_set_err_rsp(work);
}
static int check_lease_state(struct lease *lease, __le32 req_state)
@@ -8670,11 +8680,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
}
lease_state = lease->state;
- opinfo->op_state = OPLOCK_STATE_NONE;
- wake_up_interruptible_all(&opinfo->oplock_q);
- atomic_dec(&opinfo->breaking_cnt);
- wake_up_interruptible_all(&opinfo->oplock_brk);
- opinfo_put(opinfo);
rsp->StructureSize = cpu_to_le16(36);
rsp->Reserved = 0;
@@ -8683,16 +8688,16 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
rsp->LeaseState = lease_state;
rsp->LeaseDuration = 0;
ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_lease_ack));
- if (!ret)
- return;
-
+ if (ret) {
err_out:
+ smb2_set_err_rsp(work);
+ }
+
+ opinfo->op_state = OPLOCK_STATE_NONE;
wake_up_interruptible_all(&opinfo->oplock_q);
atomic_dec(&opinfo->breaking_cnt);
wake_up_interruptible_all(&opinfo->oplock_brk);
-
opinfo_put(opinfo);
- smb2_set_err_rsp(work);
}
/**
diff --git a/fs/smb/server/smb2pdu.h b/fs/smb/server/smb2pdu.h
index 17a0b18a8406..16ae8a10490b 100644
--- a/fs/smb/server/smb2pdu.h
+++ b/fs/smb/server/smb2pdu.h
@@ -63,6 +63,9 @@ struct preauth_integrity_info {
#define SMB2_SESSION_TIMEOUT (10 * HZ)
+/* Apple Defined Contexts */
+#define SMB2_CREATE_AAPL "AAPL"
+
struct create_durable_req_v2 {
struct create_context_hdr ccontext;
__u8 Name[8];
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index 64a428a06ace..c6cbe0d56e32 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -433,7 +433,8 @@ static void free_transport(struct smb_direct_transport *t)
if (t->qp) {
ib_drain_qp(t->qp);
ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs);
- ib_destroy_qp(t->qp);
+ t->qp = NULL;
+ rdma_destroy_qp(t->cm_id);
}
ksmbd_debug(RDMA, "drain the reassembly queue\n");
@@ -1940,8 +1941,8 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
return 0;
err:
if (t->qp) {
- ib_destroy_qp(t->qp);
t->qp = NULL;
+ rdma_destroy_qp(t->cm_id);
}
if (t->recv_cq) {
ib_destroy_cq(t->recv_cq);
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index baf0d3031a44..134cabdd60eb 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -1280,6 +1280,7 @@ out1:
err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry);
if (err) {
+ mnt_drop_write(parent_path->mnt);
path_put(path);
path_put(parent_path);
}
diff --git a/fs/xfs/libxfs/xfs_group.c b/fs/xfs/libxfs/xfs_group.c
index e9d76bcdc820..20ad7c309489 100644
--- a/fs/xfs/libxfs/xfs_group.c
+++ b/fs/xfs/libxfs/xfs_group.c
@@ -163,7 +163,8 @@ xfs_group_free(
xfs_defer_drain_free(&xg->xg_intents_drain);
#ifdef __KERNEL__
- kfree(xg->xg_busy_extents);
+ if (xfs_group_has_extent_busy(xg->xg_mount, xg->xg_type))
+ kfree(xg->xg_busy_extents);
#endif
if (uninit)
@@ -189,9 +190,11 @@ xfs_group_insert(
xg->xg_type = type;
#ifdef __KERNEL__
- xg->xg_busy_extents = xfs_extent_busy_alloc();
- if (!xg->xg_busy_extents)
- return -ENOMEM;
+ if (xfs_group_has_extent_busy(mp, type)) {
+ xg->xg_busy_extents = xfs_extent_busy_alloc();
+ if (!xg->xg_busy_extents)
+ return -ENOMEM;
+ }
spin_lock_init(&xg->xg_state_lock);
xfs_hooks_init(&xg->xg_rmap_update_hooks);
#endif
@@ -210,7 +213,8 @@ xfs_group_insert(
out_drain:
xfs_defer_drain_free(&xg->xg_intents_drain);
#ifdef __KERNEL__
- kfree(xg->xg_busy_extents);
+ if (xfs_group_has_extent_busy(xg->xg_mount, xg->xg_type))
+ kfree(xg->xg_busy_extents);
#endif
return error;
}
diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h
index f069b04e8ea1..3e6e019b6146 100644
--- a/fs/xfs/xfs_extent_busy.h
+++ b/fs/xfs/xfs_extent_busy.h
@@ -68,4 +68,12 @@ static inline void xfs_extent_busy_sort(struct list_head *list)
list_sort(NULL, list, xfs_extent_busy_ag_cmp);
}
+/*
+ * Zoned RTGs don't need to track busy extents, as the actual block freeing only
+ * happens by a zone reset, which forces out all transactions that touched the
+ * to be reset zone first.
+ */
+#define xfs_group_has_extent_busy(mp, type) \
+ ((type) == XG_TYPE_AG || !xfs_has_zoned((mp)))
+
#endif /* __XFS_EXTENT_BUSY_H__ */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 6484c596ecea..736eb0924573 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1259,6 +1259,8 @@ xfs_growfs_check_rtgeom(
kfree(nmp);
+ trace_xfs_growfs_check_rtgeom(mp, min_logfsbs);
+
if (min_logfsbs > mp->m_sb.sb_logblocks)
return -EINVAL;