summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/cmservice.c25
-rw-r--r--fs/afs/dir.c10
-rw-r--r--fs/afs/write.c18
-rw-r--r--fs/btrfs/backref.c6
-rw-r--r--fs/btrfs/backref.h3
-rw-r--r--fs/btrfs/delayed-ref.c4
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/inode.c2
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/qgroup.c38
-rw-r--r--fs/btrfs/qgroup.h2
-rw-r--r--fs/btrfs/tests/qgroup-tests.c20
-rw-r--r--fs/btrfs/tree-log.c31
-rw-r--r--fs/btrfs/zoned.c12
-rw-r--r--fs/ceph/mds_client.c2
-rw-r--r--fs/cifs/cifssmb.c10
-rw-r--r--fs/cifs/connect.c4
-rw-r--r--fs/cifs/dfs_cache.c229
-rw-r--r--fs/cifs/dfs_cache.h3
-rw-r--r--fs/cifs/fs_context.c7
-rw-r--r--fs/cifs/smb2ops.c49
-rw-r--r--fs/fs-writeback.c3
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/io-wq.c7
-rw-r--r--fs/io_uring.c55
-rw-r--r--fs/seq_file.c3
-rw-r--r--fs/userfaultfd.c26
28 files changed, 416 insertions, 163 deletions
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index d3c6bb22c5f4..a3f5de28be79 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -29,16 +29,11 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *);
static int afs_deliver_yfs_cb_callback(struct afs_call *);
-#define CM_NAME(name) \
- char afs_SRXCB##name##_name[] __tracepoint_string = \
- "CB." #name
-
/*
* CB.CallBack operation type
*/
-static CM_NAME(CallBack);
static const struct afs_call_type afs_SRXCBCallBack = {
- .name = afs_SRXCBCallBack_name,
+ .name = "CB.CallBack",
.deliver = afs_deliver_cb_callback,
.destructor = afs_cm_destructor,
.work = SRXAFSCB_CallBack,
@@ -47,9 +42,8 @@ static const struct afs_call_type afs_SRXCBCallBack = {
/*
* CB.InitCallBackState operation type
*/
-static CM_NAME(InitCallBackState);
static const struct afs_call_type afs_SRXCBInitCallBackState = {
- .name = afs_SRXCBInitCallBackState_name,
+ .name = "CB.InitCallBackState",
.deliver = afs_deliver_cb_init_call_back_state,
.destructor = afs_cm_destructor,
.work = SRXAFSCB_InitCallBackState,
@@ -58,9 +52,8 @@ static const struct afs_call_type afs_SRXCBInitCallBackState = {
/*
* CB.InitCallBackState3 operation type
*/
-static CM_NAME(InitCallBackState3);
static const struct afs_call_type afs_SRXCBInitCallBackState3 = {
- .name = afs_SRXCBInitCallBackState3_name,
+ .name = "CB.InitCallBackState3",
.deliver = afs_deliver_cb_init_call_back_state3,
.destructor = afs_cm_destructor,
.work = SRXAFSCB_InitCallBackState,
@@ -69,9 +62,8 @@ static const struct afs_call_type afs_SRXCBInitCallBackState3 = {
/*
* CB.Probe operation type
*/
-static CM_NAME(Probe);
static const struct afs_call_type afs_SRXCBProbe = {
- .name = afs_SRXCBProbe_name,
+ .name = "CB.Probe",
.deliver = afs_deliver_cb_probe,
.destructor = afs_cm_destructor,
.work = SRXAFSCB_Probe,
@@ -80,9 +72,8 @@ static const struct afs_call_type afs_SRXCBProbe = {
/*
* CB.ProbeUuid operation type
*/
-static CM_NAME(ProbeUuid);
static const struct afs_call_type afs_SRXCBProbeUuid = {
- .name = afs_SRXCBProbeUuid_name,
+ .name = "CB.ProbeUuid",
.deliver = afs_deliver_cb_probe_uuid,
.destructor = afs_cm_destructor,
.work = SRXAFSCB_ProbeUuid,
@@ -91,9 +82,8 @@ static const struct afs_call_type afs_SRXCBProbeUuid = {
/*
* CB.TellMeAboutYourself operation type
*/
-static CM_NAME(TellMeAboutYourself);
static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
- .name = afs_SRXCBTellMeAboutYourself_name,
+ .name = "CB.TellMeAboutYourself",
.deliver = afs_deliver_cb_tell_me_about_yourself,
.destructor = afs_cm_destructor,
.work = SRXAFSCB_TellMeAboutYourself,
@@ -102,9 +92,8 @@ static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
/*
* YFS CB.CallBack operation type
*/
-static CM_NAME(YFS_CallBack);
static const struct afs_call_type afs_SRXYFSCB_CallBack = {
- .name = afs_SRXCBYFS_CallBack_name,
+ .name = "YFSCB.CallBack",
.deliver = afs_deliver_yfs_cb_callback,
.destructor = afs_cm_destructor,
.work = SRXAFSCB_CallBack,
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 78719f2f567e..ac829e63c570 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -656,7 +656,6 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
return ret;
}
- ret = -ENOENT;
if (!cookie.found) {
_leave(" = -ENOENT [not found]");
return -ENOENT;
@@ -2020,17 +2019,20 @@ static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
if (d_count(new_dentry) > 2) {
/* copy the target dentry's name */
- ret = -ENOMEM;
op->rename.tmp = d_alloc(new_dentry->d_parent,
&new_dentry->d_name);
- if (!op->rename.tmp)
+ if (!op->rename.tmp) {
+ op->error = -ENOMEM;
goto error;
+ }
ret = afs_sillyrename(new_dvnode,
AFS_FS_I(d_inode(new_dentry)),
new_dentry, op->key);
- if (ret)
+ if (ret) {
+ op->error = ret;
goto error;
+ }
op->dentry_2 = op->rename.tmp;
op->rename.rehash = NULL;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3104b62c2082..c0534697268e 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -771,14 +771,20 @@ int afs_writepages(struct address_space *mapping,
if (wbc->range_cyclic) {
start = mapping->writeback_index * PAGE_SIZE;
ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
- if (start > 0 && wbc->nr_to_write > 0 && ret == 0)
- ret = afs_writepages_region(mapping, wbc, 0, start,
- &next);
- mapping->writeback_index = next / PAGE_SIZE;
+ if (ret == 0) {
+ mapping->writeback_index = next / PAGE_SIZE;
+ if (start > 0 && wbc->nr_to_write > 0) {
+ ret = afs_writepages_region(mapping, wbc, 0,
+ start, &next);
+ if (ret == 0)
+ mapping->writeback_index =
+ next / PAGE_SIZE;
+ }
+ }
} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
- if (wbc->nr_to_write > 0)
- mapping->writeback_index = next;
+ if (wbc->nr_to_write > 0 && ret == 0)
+ mapping->writeback_index = next / PAGE_SIZE;
} else {
ret = afs_writepages_region(mapping, wbc,
wbc->range_start, wbc->range_end, &next);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 7a8a2fc19533..78b202d198b8 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1488,15 +1488,15 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots,
- bool ignore_offset)
+ bool ignore_offset, bool skip_commit_root_sem)
{
int ret;
- if (!trans)
+ if (!trans && !skip_commit_root_sem)
down_read(&fs_info->commit_root_sem);
ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
time_seq, roots, ignore_offset);
- if (!trans)
+ if (!trans && !skip_commit_root_sem)
up_read(&fs_info->commit_root_sem);
return ret;
}
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 17abde7f794c..ff5f07f9940b 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -47,7 +47,8 @@ int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
const u64 *extent_item_pos, bool ignore_offset);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 time_seq, struct ulist **roots, bool ignore_offset);
+ u64 time_seq, struct ulist **roots, bool ignore_offset,
+ bool skip_commit_root_sem);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off,
struct extent_buffer *eb_in, u64 parent,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 06bc842ecdb3..ca848b183474 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -974,7 +974,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
if (qrecord_inserted)
- btrfs_qgroup_trace_extent_post(fs_info, record);
+ btrfs_qgroup_trace_extent_post(trans, record);
return 0;
}
@@ -1069,7 +1069,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
if (qrecord_inserted)
- return btrfs_qgroup_trace_extent_post(fs_info, record);
+ return btrfs_qgroup_trace_extent_post(trans, record);
return 0;
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d296483d148f..268ce58d4569 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6019,6 +6019,9 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
mutex_lock(&fs_info->fs_devices->device_list_mutex);
devices = &fs_info->fs_devices->devices;
list_for_each_entry(device, devices, dev_list) {
+ if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
+ continue;
+
ret = btrfs_trim_free_extents(device, &group_trimmed);
if (ret) {
dev_failed++;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8f60314c36c5..0117d867ecf8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2992,7 +2992,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
- if (ordered_extent->disk)
+ if (ordered_extent->bdev)
btrfs_rewrite_logical_zoned(ordered_extent);
btrfs_free_io_failure_record(inode, start, end);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 6eb41b7c0c84..5c0f8481e25e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -190,8 +190,6 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
entry->truncated_len = (u64)-1;
entry->qgroup_rsv = ret;
entry->physical = (u64)-1;
- entry->disk = NULL;
- entry->partno = (u8)-1;
ASSERT(type == BTRFS_ORDERED_REGULAR ||
type == BTRFS_ORDERED_NOCOW ||
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 566472004edd..b2d88aba8420 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -145,8 +145,7 @@ struct btrfs_ordered_extent {
* command in a workqueue context
*/
u64 physical;
- struct gendisk *disk;
- u8 partno;
+ struct block_device *bdev;
};
/*
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 07ec06d4e972..0fa121171ca1 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1704,17 +1704,39 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
return 0;
}
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
+int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
struct btrfs_qgroup_extent_record *qrecord)
{
struct ulist *old_root;
u64 bytenr = qrecord->bytenr;
int ret;
- ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
+ /*
+ * We are always called in a context where we are already holding a
+ * transaction handle. Often we are called when adding a data delayed
+ * reference from btrfs_truncate_inode_items() (truncating or unlinking),
+ * in which case we will be holding a write lock on extent buffer from a
+ * subvolume tree. In this case we can't allow btrfs_find_all_roots() to
+ * acquire fs_info->commit_root_sem, because that is a higher level lock
+ * that must be acquired before locking any extent buffers.
+ *
+ * So we want btrfs_find_all_roots() to not acquire the commit_root_sem
+ * but we can't pass it a non-NULL transaction handle, because otherwise
+ * it would not use commit roots and would lock extent buffers, causing
+ * a deadlock if it ends up trying to read lock the same extent buffer
+ * that was previously write locked at btrfs_truncate_inode_items().
+ *
+ * So pass a NULL transaction handle to btrfs_find_all_roots() and
+ * explicitly tell it to not acquire the commit_root_sem - if we are
+ * holding a transaction handle we don't need its protection.
+ */
+ ASSERT(trans != NULL);
+
+ ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root,
+ false, true);
if (ret < 0) {
- fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
- btrfs_warn(fs_info,
+ trans->fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ btrfs_warn(trans->fs_info,
"error accounting new delayed refs extent (err code: %d), quota inconsistent",
ret);
return 0;
@@ -1758,7 +1780,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
kfree(record);
return 0;
}
- return btrfs_qgroup_trace_extent_post(fs_info, record);
+ return btrfs_qgroup_trace_extent_post(trans, record);
}
int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
@@ -2629,7 +2651,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
/* Search commit root to find old_roots */
ret = btrfs_find_all_roots(NULL, fs_info,
record->bytenr, 0,
- &record->old_roots, false);
+ &record->old_roots, false, false);
if (ret < 0)
goto cleanup;
}
@@ -2645,7 +2667,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
* current root. It's safe inside commit_transaction().
*/
ret = btrfs_find_all_roots(trans, fs_info,
- record->bytenr, BTRFS_SEQ_LAST, &new_roots, false);
+ record->bytenr, BTRFS_SEQ_LAST, &new_roots, false, false);
if (ret < 0)
goto cleanup;
if (qgroup_to_skip) {
@@ -3179,7 +3201,7 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
num_bytes = found.offset;
ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
- &roots, false);
+ &roots, false, false);
if (ret < 0)
goto out;
/* For rescan, just pass old_roots as NULL */
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 7283e4f549af..880e9df0dac1 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -298,7 +298,7 @@ int btrfs_qgroup_trace_extent_nolock(
* using current root, then we can move all expensive backref walk out of
* transaction committing, but not now as qgroup accounting will be wrong again.
*/
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
+int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
struct btrfs_qgroup_extent_record *qrecord);
/*
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index f3137285a9e2..98b5aaba46f1 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -224,7 +224,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
* quota.
*/
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
@@ -237,7 +237,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
return ret;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -261,7 +261,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
new_roots = NULL;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
@@ -273,7 +273,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
return -EINVAL;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -325,7 +325,7 @@ static int test_multiple_refs(struct btrfs_root *root,
}
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
@@ -338,7 +338,7 @@ static int test_multiple_refs(struct btrfs_root *root,
return ret;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -360,7 +360,7 @@ static int test_multiple_refs(struct btrfs_root *root,
}
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
@@ -373,7 +373,7 @@ static int test_multiple_refs(struct btrfs_root *root,
return ret;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -401,7 +401,7 @@ static int test_multiple_refs(struct btrfs_root *root,
}
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
@@ -414,7 +414,7 @@ static int test_multiple_refs(struct btrfs_root *root,
return ret;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index dc6eb088d73e..9fd0348be7f5 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5526,16 +5526,29 @@ log_extents:
spin_lock(&inode->lock);
inode->logged_trans = trans->transid;
/*
- * Don't update last_log_commit if we logged that an inode exists
- * after it was loaded to memory (full_sync bit set).
- * This is to prevent data loss when we do a write to the inode,
- * then the inode gets evicted after all delalloc was flushed,
- * then we log it exists (due to a rename for example) and then
- * fsync it. This last fsync would do nothing (not logging the
- * extents previously written).
+ * Don't update last_log_commit if we logged that an inode exists.
+ * We do this for two reasons:
+ *
+ * 1) We might have had buffered writes to this inode that were
+ * flushed and had their ordered extents completed in this
+ * transaction, but we did not previously log the inode with
+ * LOG_INODE_ALL. Later the inode was evicted and after that
+ * it was loaded again and this LOG_INODE_EXISTS log operation
+ * happened. We must make sure that if an explicit fsync against
+ * the inode is performed later, it logs the new extents, an
+ * updated inode item, etc, and syncs the log. The same logic
+ * applies to direct IO writes instead of buffered writes.
+ *
+ * 2) When we log the inode with LOG_INODE_EXISTS, its inode item
+ * is logged with an i_size of 0 or whatever value was logged
+ * before. If later the i_size of the inode is increased by a
+ * truncate operation, the log is synced through an fsync of
+ * some other inode and then finally an explicit fsync against
+ * this inode is made, we must make sure this fsync logs the
+ * inode with the new i_size, the hole between old i_size and
+ * the new i_size, and syncs the log.
*/
- if (inode_only != LOG_INODE_EXISTS ||
- !test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
+ if (inode_only != LOG_INODE_EXISTS)
inode->last_log_commit = inode->last_sub_trans;
spin_unlock(&inode->lock);
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 297c0b1c0634..907c2cc45c9c 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1349,8 +1349,7 @@ void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset,
return;
ordered->physical = physical;
- ordered->disk = bio->bi_bdev->bd_disk;
- ordered->partno = bio->bi_bdev->bd_partno;
+ ordered->bdev = bio->bi_bdev;
btrfs_put_ordered_extent(ordered);
}
@@ -1362,18 +1361,16 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered)
struct extent_map_tree *em_tree;
struct extent_map *em;
struct btrfs_ordered_sum *sum;
- struct block_device *bdev;
u64 orig_logical = ordered->disk_bytenr;
u64 *logical = NULL;
int nr, stripe_len;
/* Zoned devices should not have partitions. So, we can assume it is 0 */
- ASSERT(ordered->partno == 0);
- bdev = bdgrab(ordered->disk->part0);
- if (WARN_ON(!bdev))
+ ASSERT(!bdev_is_partition(ordered->bdev));
+ if (WARN_ON(!ordered->bdev))
return;
- if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, bdev,
+ if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, ordered->bdev,
ordered->physical, &logical, &nr,
&stripe_len)))
goto out;
@@ -1402,7 +1399,6 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered)
out:
kfree(logical);
- bdput(bdev);
}
bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a818213c972f..9db1b39df773 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4456,7 +4456,7 @@ bool check_session_state(struct ceph_mds_session *s)
break;
case CEPH_MDS_SESSION_CLOSING:
/* Should never reach this when we're unmounting */
- WARN_ON_ONCE(true);
+ WARN_ON_ONCE(s->s_ttl);
fallthrough;
case CEPH_MDS_SESSION_NEW:
case CEPH_MDS_SESSION_RESTARTING:
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index f72e3b3dca69..65d1a65bfc37 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -873,8 +873,11 @@ PsxDelete:
InformationLevel) - 4;
offset = param_offset + params;
- /* Setup pointer to Request Data (inode type) */
- pRqD = (struct unlink_psx_rq *)(((char *)&pSMB->hdr.Protocol) + offset);
+ /* Setup pointer to Request Data (inode type).
+ * Note that SMB offsets are from the beginning of SMB which is 4 bytes
+ * in, after RFC1001 field
+ */
+ pRqD = (struct unlink_psx_rq *)((char *)(pSMB) + offset + 4);
pRqD->type = cpu_to_le16(type);
pSMB->ParameterOffset = cpu_to_le16(param_offset);
pSMB->DataOffset = cpu_to_le16(offset);
@@ -1081,7 +1084,8 @@ PsxCreat:
param_offset = offsetof(struct smb_com_transaction2_spi_req,
InformationLevel) - 4;
offset = param_offset + params;
- pdata = (OPEN_PSX_REQ *)(((char *)&pSMB->hdr.Protocol) + offset);
+ /* SMB offsets are from the beginning of SMB which is 4 bytes in, after RFC1001 field */
+ pdata = (OPEN_PSX_REQ *)((char *)(pSMB) + offset + 4);
pdata->Level = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
pdata->Permissions = cpu_to_le64(mode);
pdata->PosixOpenFlags = cpu_to_le32(posix_flags);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 1b04d6ec14dd..3781eee9360a 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -220,7 +220,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
#ifdef CONFIG_CIFS_DFS_UPCALL
struct super_block *sb = NULL;
struct cifs_sb_info *cifs_sb = NULL;
- struct dfs_cache_tgt_list tgt_list = {0};
+ struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list);
struct dfs_cache_tgt_iterator *tgt_it = NULL;
#endif
@@ -3130,7 +3130,7 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_
{
int rc;
char *npath = NULL;
- struct dfs_cache_tgt_list tgt_list = {0};
+ struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list);
struct dfs_cache_tgt_iterator *tgt_it = NULL;
struct smb3_fs_context tmp_ctx = {NULL};
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 7c1769714609..283745592844 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -19,6 +19,7 @@
#include "cifs_debug.h"
#include "cifs_unicode.h"
#include "smb2glob.h"
+#include "dns_resolve.h"
#include "dfs_cache.h"
@@ -911,6 +912,7 @@ static int get_targets(struct cache_entry *ce, struct dfs_cache_tgt_list *tl)
err_free_it:
list_for_each_entry_safe(it, nit, head, it_list) {
+ list_del(&it->it_list);
kfree(it->it_name);
kfree(it);
}
@@ -1293,6 +1295,194 @@ int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it,
return 0;
}
+static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, const char *s2)
+{
+ char unc[sizeof("\\\\") + SERVER_NAME_LENGTH] = {0};
+ const char *host;
+ size_t hostlen;
+ char *ip = NULL;
+ struct sockaddr sa;
+ bool match;
+ int rc;
+
+ if (strcasecmp(s1, s2))
+ return false;
+
+ /*
+ * Resolve share's hostname and check if server address matches. Otherwise just ignore it
+ * as we could not have upcall to resolve hostname or failed to convert ip address.
+ */
+ match = true;
+ extract_unc_hostname(s1, &host, &hostlen);
+ scnprintf(unc, sizeof(unc), "\\\\%.*s", (int)hostlen, host);
+
+ rc = dns_resolve_server_name_to_ip(unc, &ip, NULL);
+ if (rc < 0) {
+ cifs_dbg(FYI, "%s: could not resolve %.*s. assuming server address matches.\n",
+ __func__, (int)hostlen, host);
+ return true;
+ }
+
+ if (!cifs_convert_address(&sa, ip, strlen(ip))) {
+ cifs_dbg(VFS, "%s: failed to convert address \'%s\'. skip address matching.\n",
+ __func__, ip);
+ } else {
+ mutex_lock(&server->srv_mutex);
+ match = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, &sa);
+ mutex_unlock(&server->srv_mutex);
+ }
+
+ kfree(ip);
+ return match;
+}
+
+/*
+ * Mark dfs tcon for reconnecting when the currently connected tcon does not match any of the new
+ * target shares in @refs.
+ */
+static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cache_tgt_list *tl,
+ const struct dfs_info3_param *refs, int numrefs)
+{
+ struct dfs_cache_tgt_iterator *it;
+ int i;
+
+ for (it = dfs_cache_get_tgt_iterator(tl); it; it = dfs_cache_get_next_tgt(tl, it)) {
+ for (i = 0; i < numrefs; i++) {
+ if (target_share_equal(tcon->ses->server, dfs_cache_get_tgt_name(it),
+ refs[i].node_name))
+ return;
+ }
+ }
+
+ cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__);
+ for (i = 0; i < tcon->ses->chan_count; i++) {
+ spin_lock(&GlobalMid_Lock);
+ if (tcon->ses->chans[i].server->tcpStatus != CifsExiting)
+ tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect;
+ spin_unlock(&GlobalMid_Lock);
+ }
+}
+
+/* Refresh dfs referral of tcon and mark it for reconnect if needed */
+static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh)
+{
+ const char *path = tcon->dfs_path + 1;
+ struct cifs_ses *ses;
+ struct cache_entry *ce;
+ struct dfs_info3_param *refs = NULL;
+ int numrefs = 0;
+ bool needs_refresh = false;
+ struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
+ int rc = 0;
+ unsigned int xid;
+
+ ses = find_ipc_from_server_path(sessions, path);
+ if (IS_ERR(ses)) {
+ cifs_dbg(FYI, "%s: could not find ipc session\n", __func__);
+ return PTR_ERR(ses);
+ }
+
+ down_read(&htable_rw_lock);
+ ce = lookup_cache_entry(path);
+ needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce);
+ if (!IS_ERR(ce)) {
+ rc = get_targets(ce, &tl);
+ if (rc)
+ cifs_dbg(FYI, "%s: could not get dfs targets: %d\n", __func__, rc);
+ }
+ up_read(&htable_rw_lock);
+
+ if (!needs_refresh) {
+ rc = 0;
+ goto out;
+ }
+
+ xid = get_xid();
+ rc = get_dfs_referral(xid, ses, path, &refs, &numrefs);
+ free_xid(xid);
+
+ /* Create or update a cache entry with the new referral */
+ if (!rc) {
+ dump_refs(refs, numrefs);
+
+ down_write(&htable_rw_lock);
+ ce = lookup_cache_entry(path);
+ if (IS_ERR(ce))
+ add_cache_entry_locked(refs, numrefs);
+ else if (force_refresh || cache_entry_expired(ce))
+ update_cache_entry_locked(ce, refs, numrefs);
+ up_write(&htable_rw_lock);
+
+ mark_for_reconnect_if_needed(tcon, &tl, refs, numrefs);
+ }
+
+out:
+ dfs_cache_free_tgts(&tl);
+ free_dfs_info_array(refs, numrefs);
+ return rc;
+}
+
+/**
+ * dfs_cache_remount_fs - remount a DFS share
+ *
+ * Reconfigure dfs mount by forcing a new DFS referral and if the currently cached targets do not
+ * match any of the new targets, mark it for reconnect.
+ *
+ * @cifs_sb: cifs superblock.
+ *
+ * Return zero if remounted, otherwise non-zero.
+ */
+int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
+{
+ struct cifs_tcon *tcon;
+ struct mount_group *mg;
+ struct cifs_ses *sessions[CACHE_MAX_ENTRIES + 1] = {NULL};
+ int rc;
+
+ if (!cifs_sb || !cifs_sb->master_tlink)
+ return -EINVAL;
+
+ tcon = cifs_sb_master_tcon(cifs_sb);
+ if (!tcon->dfs_path) {
+ cifs_dbg(FYI, "%s: not a dfs tcon\n", __func__);
+ return 0;
+ }
+
+ if (uuid_is_null(&cifs_sb->dfs_mount_id)) {
+ cifs_dbg(FYI, "%s: tcon has no dfs mount group id\n", __func__);
+ return -EINVAL;
+ }
+
+ mutex_lock(&mount_group_list_lock);
+ mg = find_mount_group_locked(&cifs_sb->dfs_mount_id);
+ if (IS_ERR(mg)) {
+ mutex_unlock(&mount_group_list_lock);
+ cifs_dbg(FYI, "%s: tcon has ipc session to refresh referral\n", __func__);
+ return PTR_ERR(mg);
+ }
+ kref_get(&mg->refcount);
+ mutex_unlock(&mount_group_list_lock);
+
+ spin_lock(&mg->lock);
+ memcpy(&sessions, mg->sessions, mg->num_sessions * sizeof(mg->sessions[0]));
+ spin_unlock(&mg->lock);
+
+ /*
+ * After reconnecting to a different server, unique ids won't match anymore, so we disable
+ * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE).
+ */
+ cifs_autodisable_serverino(cifs_sb);
+ /*
+ * Force the use of prefix path to support failover on DFS paths that resolve to targets
+ * that have different prefix paths.
+ */
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
+ rc = refresh_tcon(sessions, tcon, true);
+
+ kref_put(&mg->refcount, mount_group_release);
+ return rc;
+}
+
/*
* Refresh all active dfs mounts regardless of whether they are in cache or not.
* (cache can be cleared)
@@ -1303,7 +1493,6 @@ static void refresh_mounts(struct cifs_ses **sessions)
struct cifs_ses *ses;
struct cifs_tcon *tcon, *ntcon;
struct list_head tcons;
- unsigned int xid;
INIT_LIST_HEAD(&tcons);
@@ -1321,44 +1510,8 @@ static void refresh_mounts(struct cifs_ses **sessions)
spin_unlock(&cifs_tcp_ses_lock);
list_for_each_entry_safe(tcon, ntcon, &tcons, ulist) {
- const char *path = tcon->dfs_path + 1;
- struct cache_entry *ce;
- struct dfs_info3_param *refs = NULL;
- int numrefs = 0;
- bool needs_refresh = false;
- int rc = 0;
-
list_del_init(&tcon->ulist);
-
- ses = find_ipc_from_server_path(sessions, path);
- if (IS_ERR(ses))
- goto next_tcon;
-
- down_read(&htable_rw_lock);
- ce = lookup_cache_entry(path);
- needs_refresh = IS_ERR(ce) || cache_entry_expired(ce);
- up_read(&htable_rw_lock);
-
- if (!needs_refresh)
- goto next_tcon;
-
- xid = get_xid();
- rc = get_dfs_referral(xid, ses, path, &refs, &numrefs);
- free_xid(xid);
-
- /* Create or update a cache entry with the new referral */
- if (!rc) {
- down_write(&htable_rw_lock);
- ce = lookup_cache_entry(path);
- if (IS_ERR(ce))
- add_cache_entry_locked(refs, numrefs);
- else if (cache_entry_expired(ce))
- update_cache_entry_locked(ce, refs, numrefs);
- up_write(&htable_rw_lock);
- }
-
-next_tcon:
- free_dfs_info_array(refs, numrefs);
+ refresh_tcon(sessions, tcon, false);
cifs_put_tcon(tcon);
}
}
diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h
index b29d3ae64829..52070d1df189 100644
--- a/fs/cifs/dfs_cache.h
+++ b/fs/cifs/dfs_cache.h
@@ -13,6 +13,8 @@
#include <linux/uuid.h>
#include "cifsglob.h"
+#define DFS_CACHE_TGT_LIST_INIT(var) { .tl_numtgts = 0, .tl_list = LIST_HEAD_INIT((var).tl_list), }
+
struct dfs_cache_tgt_list {
int tl_numtgts;
struct list_head tl_list;
@@ -44,6 +46,7 @@ int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it,
void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id);
void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses);
char *dfs_cache_canonical_path(const char *path, const struct nls_table *cp, int remap);
+int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb);
static inline struct dfs_cache_tgt_iterator *
dfs_cache_get_next_tgt(struct dfs_cache_tgt_list *tl,
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 553adfbcc22a..9a59d7ff9a11 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -13,6 +13,9 @@
#include <linux/magic.h>
#include <linux/security.h>
#include <net/net_namespace.h>
+#ifdef CONFIG_CIFS_DFS_UPCALL
+#include "dfs_cache.h"
+#endif
*/
#include <linux/ctype.h>
@@ -779,6 +782,10 @@ static int smb3_reconfigure(struct fs_context *fc)
smb3_cleanup_fs_context_contents(cifs_sb->ctx);
rc = smb3_fs_context_dup(cifs_sb->ctx, ctx);
smb3_update_mnt_flags(cifs_sb);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ if (!rc)
+ rc = dfs_cache_remount_fs(cifs_sb);
+#endif
return rc;
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index ba3c58e1f725..23d6f4d71649 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3617,7 +3617,7 @@ static int smb3_simple_fallocate_write_range(unsigned int xid,
char *buf)
{
struct cifs_io_parms io_parms = {0};
- int nbytes;
+ int rc, nbytes;
struct kvec iov[2];
io_parms.netfid = cfile->fid.netfid;
@@ -3625,13 +3625,25 @@ static int smb3_simple_fallocate_write_range(unsigned int xid,
io_parms.tcon = tcon;
io_parms.persistent_fid = cfile->fid.persistent_fid;
io_parms.volatile_fid = cfile->fid.volatile_fid;
- io_parms.offset = off;
- io_parms.length = len;
- /* iov[0] is reserved for smb header */
- iov[1].iov_base = buf;
- iov[1].iov_len = io_parms.length;
- return SMB2_write(xid, &io_parms, &nbytes, iov, 1);
+ while (len) {
+ io_parms.offset = off;
+ io_parms.length = len;
+ if (io_parms.length > SMB2_MAX_BUFFER_SIZE)
+ io_parms.length = SMB2_MAX_BUFFER_SIZE;
+ /* iov[0] is reserved for smb header */
+ iov[1].iov_base = buf;
+ iov[1].iov_len = io_parms.length;
+ rc = SMB2_write(xid, &io_parms, &nbytes, iov, 1);
+ if (rc)
+ break;
+ if (nbytes > len)
+ return -EINVAL;
+ buf += nbytes;
+ off += nbytes;
+ len -= nbytes;
+ }
+ return rc;
}
static int smb3_simple_fallocate_range(unsigned int xid,
@@ -3655,11 +3667,6 @@ static int smb3_simple_fallocate_range(unsigned int xid,
(char **)&out_data, &out_data_len);
if (rc)
goto out;
- /*
- * It is already all allocated
- */
- if (out_data_len == 0)
- goto out;
buf = kzalloc(1024 * 1024, GFP_KERNEL);
if (buf == NULL) {
@@ -3782,6 +3789,24 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
goto out;
}
+ if (keep_size == true) {
+ /*
+ * We can not preallocate pages beyond the end of the file
+ * in SMB2
+ */
+ if (off >= i_size_read(inode)) {
+ rc = 0;
+ goto out;
+ }
+ /*
+ * For fallocates that are partially beyond the end of file,
+ * clamp len so we only fallocate up to the end of file.
+ */
+ if (off + len > i_size_read(inode)) {
+ len = i_size_read(inode) - off;
+ }
+ }
+
if ((keep_size == true) || (i_size_read(inode) >= off + len)) {
/*
* At this point, we are trying to fallocate an internal
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 06d04a74ab6c..4c3370548982 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -521,6 +521,9 @@ static bool inode_prepare_wbs_switch(struct inode *inode,
*/
smp_mb();
+ if (IS_DAX(inode))
+ return false;
+
/* while holding I_WB_SWITCH, no one else can update the association */
spin_lock(&inode->i_lock);
if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 926eeb9bf4eb..cdfb1ae78a3f 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -77,7 +77,7 @@ enum hugetlb_param {
static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
fsparam_u32 ("gid", Opt_gid),
fsparam_string("min_size", Opt_min_size),
- fsparam_u32 ("mode", Opt_mode),
+ fsparam_u32oct("mode", Opt_mode),
fsparam_string("nr_inodes", Opt_nr_inodes),
fsparam_string("pagesize", Opt_pagesize),
fsparam_string("size", Opt_size),
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 843d4a7bcd6e..cf086b01c6c6 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -731,7 +731,12 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
int work_flags;
unsigned long flags;
- if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) {
+ /*
+ * If io-wq is exiting for this task, or if the request has explicitly
+ * been marked as one that should not get executed, cancel it here.
+ */
+ if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) ||
+ (work->flags & IO_WQ_WORK_CANCEL)) {
io_run_cancel(work, wqe);
return;
}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0cac361bf6b8..5a0fd6bcd318 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1294,6 +1294,17 @@ static void io_queue_async_work(struct io_kiocb *req)
/* init ->work of the whole link before punting */
io_prep_async_link(req);
+
+ /*
+ * Not expected to happen, but if we do have a bug where this _can_
+ * happen, catch it here and ensure the request is marked as
+ * canceled. That will make io-wq go through the usual work cancel
+ * procedure rather than attempt to run this request (or create a new
+ * worker for it).
+ */
+ if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
+ req->work.flags |= IO_WQ_WORK_CANCEL;
+
trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
&req->work, req->flags);
io_wq_enqueue(tctx->io_wq, &req->work);
@@ -2205,7 +2216,7 @@ static inline bool io_run_task_work(void)
* Find and free completed poll iocbs
*/
static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
- struct list_head *done)
+ struct list_head *done, bool resubmit)
{
struct req_batch rb;
struct io_kiocb *req;
@@ -2220,7 +2231,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
req = list_first_entry(done, struct io_kiocb, inflight_entry);
list_del(&req->inflight_entry);
- if (READ_ONCE(req->result) == -EAGAIN &&
+ if (READ_ONCE(req->result) == -EAGAIN && resubmit &&
!(req->flags & REQ_F_DONT_REISSUE)) {
req->iopoll_completed = 0;
req_ref_get(req);
@@ -2244,7 +2255,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
}
static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
- long min)
+ long min, bool resubmit)
{
struct io_kiocb *req, *tmp;
LIST_HEAD(done);
@@ -2287,7 +2298,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
}
if (!list_empty(&done))
- io_iopoll_complete(ctx, nr_events, &done);
+ io_iopoll_complete(ctx, nr_events, &done, resubmit);
return ret;
}
@@ -2305,7 +2316,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
while (!list_empty(&ctx->iopoll_list)) {
unsigned int nr_events = 0;
- io_do_iopoll(ctx, &nr_events, 0);
+ io_do_iopoll(ctx, &nr_events, 0, false);
/* let it sleep and repeat later if can't complete a request */
if (nr_events == 0)
@@ -2367,7 +2378,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
list_empty(&ctx->iopoll_list))
break;
}
- ret = io_do_iopoll(ctx, &nr_events, min);
+ ret = io_do_iopoll(ctx, &nr_events, min, true);
} while (!ret && nr_events < min && !need_resched());
out:
mutex_unlock(&ctx->uring_lock);
@@ -4802,6 +4813,7 @@ IO_NETOP_FN(recv);
struct io_poll_table {
struct poll_table_struct pt;
struct io_kiocb *req;
+ int nr_entries;
int error;
};
@@ -4995,11 +5007,11 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
struct io_kiocb *req = pt->req;
/*
- * If poll->head is already set, it's because the file being polled
- * uses multiple waitqueues for poll handling (eg one for read, one
- * for write). Setup a separate io_poll_iocb if this happens.
+ * The file being polled uses multiple waitqueues for poll handling
+ * (e.g. one for read, one for write). Setup a separate io_poll_iocb
+ * if this happens.
*/
- if (unlikely(poll->head)) {
+ if (unlikely(pt->nr_entries)) {
struct io_poll_iocb *poll_one = poll;
/* already have a 2nd entry, fail a third attempt */
@@ -5027,7 +5039,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
*poll_ptr = poll;
}
- pt->error = 0;
+ pt->nr_entries++;
poll->head = head;
if (poll->events & EPOLLEXCLUSIVE)
@@ -5104,11 +5116,16 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
ipt->pt._key = mask;
ipt->req = req;
- ipt->error = -EINVAL;
+ ipt->error = 0;
+ ipt->nr_entries = 0;
mask = vfs_poll(req->file, &ipt->pt) & poll->events;
+ if (unlikely(!ipt->nr_entries) && !ipt->error)
+ ipt->error = -EINVAL;
spin_lock_irq(&ctx->completion_lock);
+ if (ipt->error)
+ io_poll_remove_double(req);
if (likely(poll->head)) {
spin_lock(&poll->head->lock);
if (unlikely(list_empty(&poll->wait.entry))) {
@@ -6792,7 +6809,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
mutex_lock(&ctx->uring_lock);
if (!list_empty(&ctx->iopoll_list))
- io_do_iopoll(ctx, &nr_events, 0);
+ io_do_iopoll(ctx, &nr_events, 0, true);
/*
* Don't submit if refs are dying, good for io_uring_register(),
@@ -7899,15 +7916,19 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,
struct io_wq_data data;
unsigned int concurrency;
+ mutex_lock(&ctx->uring_lock);
hash = ctx->hash_map;
if (!hash) {
hash = kzalloc(sizeof(*hash), GFP_KERNEL);
- if (!hash)
+ if (!hash) {
+ mutex_unlock(&ctx->uring_lock);
return ERR_PTR(-ENOMEM);
+ }
refcount_set(&hash->refs, 1);
init_waitqueue_head(&hash->wait);
ctx->hash_map = hash;
}
+ mutex_unlock(&ctx->uring_lock);
data.hash = hash;
data.task = task;
@@ -7981,9 +8002,11 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
f = fdget(p->wq_fd);
if (!f.file)
return -ENXIO;
- fdput(f);
- if (f.file->f_op != &io_uring_fops)
+ if (f.file->f_op != &io_uring_fops) {
+ fdput(f);
return -EINVAL;
+ }
+ fdput(f);
}
if (ctx->flags & IORING_SETUP_SQPOLL) {
struct task_struct *tsk;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index b117b212ef28..4a2cda04d3e2 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -32,6 +32,9 @@ static void seq_set_overflow(struct seq_file *m)
static void *seq_buf_alloc(unsigned long size)
{
+ if (unlikely(size > MAX_RW_COUNT))
+ return NULL;
+
return kvmalloc(size, GFP_KERNEL_ACCOUNT);
}
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index f6e0f0c0d0e5..5c2d806e6ae5 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1236,23 +1236,21 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
}
static __always_inline int validate_range(struct mm_struct *mm,
- __u64 *start, __u64 len)
+ __u64 start, __u64 len)
{
__u64 task_size = mm->task_size;
- *start = untagged_addr(*start);
-
- if (*start & ~PAGE_MASK)
+ if (start & ~PAGE_MASK)
return -EINVAL;
if (len & ~PAGE_MASK)
return -EINVAL;
if (!len)
return -EINVAL;
- if (*start < mmap_min_addr)
+ if (start < mmap_min_addr)
return -EINVAL;
- if (*start >= task_size)
+ if (start >= task_size)
return -EINVAL;
- if (len > task_size - *start)
+ if (len > task_size - start)
return -EINVAL;
return 0;
}
@@ -1316,7 +1314,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
vm_flags |= VM_UFFD_MINOR;
}
- ret = validate_range(mm, &uffdio_register.range.start,
+ ret = validate_range(mm, uffdio_register.range.start,
uffdio_register.range.len);
if (ret)
goto out;
@@ -1522,7 +1520,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
goto out;
- ret = validate_range(mm, &uffdio_unregister.start,
+ ret = validate_range(mm, uffdio_unregister.start,
uffdio_unregister.len);
if (ret)
goto out;
@@ -1671,7 +1669,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx,
if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake)))
goto out;
- ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len);
+ ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len);
if (ret)
goto out;
@@ -1711,7 +1709,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
sizeof(uffdio_copy)-sizeof(__s64)))
goto out;
- ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len);
+ ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len);
if (ret)
goto out;
/*
@@ -1768,7 +1766,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
sizeof(uffdio_zeropage)-sizeof(__s64)))
goto out;
- ret = validate_range(ctx->mm, &uffdio_zeropage.range.start,
+ ret = validate_range(ctx->mm, uffdio_zeropage.range.start,
uffdio_zeropage.range.len);
if (ret)
goto out;
@@ -1818,7 +1816,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
sizeof(struct uffdio_writeprotect)))
return -EFAULT;
- ret = validate_range(ctx->mm, &uffdio_wp.range.start,
+ ret = validate_range(ctx->mm, uffdio_wp.range.start,
uffdio_wp.range.len);
if (ret)
return ret;
@@ -1866,7 +1864,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
sizeof(uffdio_continue) - (sizeof(__s64))))
goto out;
- ret = validate_range(ctx->mm, &uffdio_continue.range.start,
+ ret = validate_range(ctx->mm, uffdio_continue.range.start,
uffdio_continue.range.len);
if (ret)
goto out;