summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent_io.c9
-rw-r--r--fs/btrfs/inode.c2
-rw-r--r--fs/cramfs/inode.c2
-rw-r--r--fs/erofs/zdata.c4
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/ext4/mballoc.c10
-rw-r--r--fs/ext4/orphan.c6
-rw-r--r--fs/ext4/super.c4
-rw-r--r--fs/f2fs/compress.c25
-rw-r--r--fs/f2fs/data.c11
-rw-r--r--fs/f2fs/f2fs.h4
-rw-r--r--fs/f2fs/file.c49
-rw-r--r--fs/f2fs/gc.c16
-rw-r--r--fs/f2fs/super.c10
-rw-r--r--fs/fuse/file.c1
-rw-r--r--fs/gfs2/file.c23
-rw-r--r--fs/gfs2/glock.c115
-rw-r--r--fs/gfs2/glock.h4
-rw-r--r--fs/gfs2/incore.h3
-rw-r--r--fs/gfs2/lock_dlm.c72
-rw-r--r--fs/gfs2/trace_gfs2.h1
-rw-r--r--fs/hfsplus/dir.c2
-rw-r--r--fs/hfsplus/hfsplus_fs.h8
-rw-r--r--fs/hfsplus/unicode.c24
-rw-r--r--fs/hfsplus/xattr.c6
-rw-r--r--fs/nfs/localio.c65
-rw-r--r--fs/nfs/nfs4proc.c2
-rw-r--r--fs/nfsd/filecache.c34
-rw-r--r--fs/nfsd/filecache.h4
-rw-r--r--fs/nfsd/localio.c11
-rw-r--r--fs/nfsd/trace.h27
-rw-r--r--fs/nfsd/vfs.h4
-rw-r--r--fs/notify/fanotify/fanotify_user.c3
-rw-r--r--fs/ntfs3/index.c10
-rw-r--r--fs/ntfs3/run.c12
-rw-r--r--fs/ocfs2/stack_user.c1
-rw-r--r--fs/smb/client/smb2ops.c17
-rw-r--r--fs/smb/client/smbdirect.c110
-rw-r--r--fs/smb/client/smbdirect.h4
-rw-r--r--fs/smb/server/ksmbd_netlink.h5
-rw-r--r--fs/smb/server/mgmt/user_session.c26
-rw-r--r--fs/smb/server/server.h1
-rw-r--r--fs/smb/server/smb2pdu.c3
-rw-r--r--fs/smb/server/transport_ipc.c3
-rw-r--r--fs/smb/server/transport_rdma.c99
-rw-r--r--fs/smb/server/transport_tcp.c27
-rw-r--r--fs/squashfs/inode.c7
-rw-r--r--fs/squashfs/squashfs_fs_i.h2
-rw-r--r--fs/udf/inode.c3
51 files changed, 668 insertions, 237 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index b21cb72835cc..4eafe3817e11 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1621,7 +1621,7 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned long range_bitmap = 0;
bool submitted_io = false;
- bool error = false;
+ int found_error = 0;
const u64 folio_start = folio_pos(folio);
const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
u64 cur;
@@ -1685,7 +1685,8 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
*/
btrfs_mark_ordered_io_finished(inode, folio, cur,
fs_info->sectorsize, false);
- error = true;
+ if (!found_error)
+ found_error = ret;
continue;
}
submitted_io = true;
@@ -1702,11 +1703,11 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
* If we hit any error, the corresponding sector will have its dirty
* flag cleared and writeback finished, thus no need to handle the error case.
*/
- if (!submitted_io && !error) {
+ if (!submitted_io && !found_error) {
btrfs_folio_set_writeback(fs_info, folio, start, len);
btrfs_folio_clear_writeback(fs_info, folio, start, len);
}
- return ret;
+ return found_error;
}
/*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 18db1053cdf0..cd8a09e3d1dc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6479,6 +6479,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
if (!args->subvol)
btrfs_inherit_iflags(BTRFS_I(inode), BTRFS_I(dir));
+ btrfs_set_inode_mapping_order(BTRFS_I(inode));
if (S_ISREG(inode->i_mode)) {
if (btrfs_test_opt(fs_info, NODATASUM))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
@@ -6486,7 +6487,6 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
BTRFS_INODE_NODATASUM;
btrfs_update_inode_mapping_flags(BTRFS_I(inode));
- btrfs_set_inode_mapping_order(BTRFS_I(inode));
}
ret = btrfs_insert_inode_locked(inode);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index b002e9b734f9..56c8005b24a3 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -412,7 +412,7 @@ static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma)
vm_fault_t vmf;
unsigned long off = i * PAGE_SIZE;
vmf = vmf_insert_mixed(vma, vma->vm_start + off,
- address + off);
+ PHYS_PFN(address + off));
if (vmf & VM_FAULT_ERROR)
ret = vm_fault_to_errno(vmf, 0);
}
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 2d73297003d2..625b8ae8f67f 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1835,7 +1835,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_frontend *f,
map->m_la = end;
err = z_erofs_map_blocks_iter(inode, map,
EROFS_GET_BLOCKS_READMORE);
- if (err)
+ if (err || !(map->m_flags & EROFS_MAP_ENCODED))
return;
/* expand ra for the trailing edge if readahead */
@@ -1847,7 +1847,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_frontend *f,
end = round_up(end, PAGE_SIZE);
} else {
end = round_up(map->m_la, PAGE_SIZE);
- if (!map->m_llen)
+ if (!(map->m_flags & EROFS_MAP_ENCODED) || !map->m_llen)
return;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 01a6e2de7fc3..72e02df72c4c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1982,6 +1982,16 @@ static inline bool ext4_verity_in_progress(struct inode *inode)
#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
/*
+ * Check whether the inode is tracked as orphan (either in orphan file or
+ * orphan list).
+ */
+static inline bool ext4_inode_orphan_tracked(struct inode *inode)
+{
+ return ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE) ||
+ !list_empty(&EXT4_I(inode)->i_orphan);
+}
+
+/*
* Codes for operating systems
*/
#define EXT4_OS_LINUX 0
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 93240e35ee36..7a8b30932189 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -354,7 +354,7 @@ static void ext4_inode_extension_cleanup(struct inode *inode, bool need_trunc)
* to cleanup the orphan list in ext4_handle_inode_extension(). Do it
* now.
*/
- if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
+ if (ext4_inode_orphan_tracked(inode) && inode->i_nlink) {
handle_t *handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5b7a15db4953..5230452e29dd 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4748,7 +4748,7 @@ static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode
* old inodes get re-used with the upper 16 bits of the
* uid/gid intact.
*/
- if (ei->i_dtime && list_empty(&ei->i_orphan)) {
+ if (ei->i_dtime && !ext4_inode_orphan_tracked(inode)) {
raw_inode->i_uid_high = 0;
raw_inode->i_gid_high = 0;
} else {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 5898d92ba19f..6070d3c86678 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3655,16 +3655,26 @@ static void ext4_discard_work(struct work_struct *work)
static inline void ext4_mb_avg_fragment_size_destroy(struct ext4_sb_info *sbi)
{
+ if (!sbi->s_mb_avg_fragment_size)
+ return;
+
for (int i = 0; i < MB_NUM_ORDERS(sbi->s_sb); i++)
xa_destroy(&sbi->s_mb_avg_fragment_size[i]);
+
kfree(sbi->s_mb_avg_fragment_size);
+ sbi->s_mb_avg_fragment_size = NULL;
}
static inline void ext4_mb_largest_free_orders_destroy(struct ext4_sb_info *sbi)
{
+ if (!sbi->s_mb_largest_free_orders)
+ return;
+
for (int i = 0; i < MB_NUM_ORDERS(sbi->s_sb); i++)
xa_destroy(&sbi->s_mb_largest_free_orders[i]);
+
kfree(sbi->s_mb_largest_free_orders);
+ sbi->s_mb_largest_free_orders = NULL;
}
int ext4_mb_init(struct super_block *sb)
diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c
index 524d4658fa40..0fbcce67ffd4 100644
--- a/fs/ext4/orphan.c
+++ b/fs/ext4/orphan.c
@@ -109,11 +109,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
!inode_is_locked(inode));
- /*
- * Inode orphaned in orphan file or in orphan list?
- */
- if (ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE) ||
- !list_empty(&EXT4_I(inode)->i_orphan))
+ if (ext4_inode_orphan_tracked(inode))
return 0;
/*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 699c15db28a8..ba497387b9c8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1438,9 +1438,9 @@ static void ext4_free_in_core_inode(struct inode *inode)
static void ext4_destroy_inode(struct inode *inode)
{
- if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
+ if (ext4_inode_orphan_tracked(inode)) {
ext4_msg(inode->i_sb, KERN_ERR,
- "Inode %lu (%p): orphan list check failed!",
+ "Inode %lu (%p): inode tracked as orphan!",
inode->i_ino, EXT4_I(inode));
print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
EXT4_I(inode), sizeof(struct ext4_inode_info),
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 5c1f47e45dab..72bc05b913af 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1245,20 +1245,29 @@ int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock)
for (i = cluster_size - 1; i >= 0; i--) {
struct folio *folio = page_folio(rpages[i]);
- loff_t start = folio->index << PAGE_SHIFT;
+ loff_t start = (loff_t)folio->index << PAGE_SHIFT;
+ loff_t offset = from > start ? from - start : 0;
- if (from <= start) {
- folio_zero_segment(folio, 0, folio_size(folio));
- } else {
- folio_zero_segment(folio, from - start,
- folio_size(folio));
+ folio_zero_segment(folio, offset, folio_size(folio));
+
+ if (from >= start)
break;
- }
}
f2fs_compress_write_end(inode, fsdata, start_idx, true);
+
+ err = filemap_write_and_wait_range(inode->i_mapping,
+ round_down(from, cluster_size << PAGE_SHIFT),
+ LLONG_MAX);
+ if (err)
+ return err;
+
+ truncate_pagecache(inode, from);
+
+ err = f2fs_do_truncate_blocks(inode,
+ round_up(from, PAGE_SIZE), lock);
}
- return 0;
+ return err;
}
static int f2fs_write_compressed_pages(struct compress_ctx *cc,
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7961e0ddfca3..50c90bd03923 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -911,7 +911,7 @@ alloc_new:
if (fio->io_wbc)
wbc_account_cgroup_owner(fio->io_wbc, folio, folio_size(folio));
- inc_page_count(fio->sbi, WB_DATA_TYPE(data_folio, false));
+ inc_page_count(fio->sbi, WB_DATA_TYPE(folio, false));
*fio->last_block = fio->new_blkaddr;
*fio->bio = bio;
@@ -1778,12 +1778,13 @@ sync_out:
if (map->m_flags & F2FS_MAP_MAPPED) {
unsigned int ofs = start_pgofs - map->m_lblk;
- f2fs_update_read_extent_cache_range(&dn,
- start_pgofs, map->m_pblk + ofs,
- map->m_len - ofs);
+ if (map->m_len > ofs)
+ f2fs_update_read_extent_cache_range(&dn,
+ start_pgofs, map->m_pblk + ofs,
+ map->m_len - ofs);
}
if (map->m_next_extent)
- *map->m_next_extent = pgofs + 1;
+ *map->m_next_extent = is_hole ? pgofs + 1 : pgofs;
}
f2fs_put_dnode(&dn);
unlock_out:
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b4b62ac46bc6..dac7d44885e4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2361,8 +2361,6 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi,
{
if (!inode)
return true;
- if (!test_opt(sbi, RESERVE_ROOT))
- return false;
if (IS_NOQUOTA(inode))
return true;
if (uid_eq(F2FS_OPTION(sbi).s_resuid, current_fsuid()))
@@ -2383,7 +2381,7 @@ static inline unsigned int get_available_block_count(struct f2fs_sb_info *sbi,
avail_user_block_count = sbi->user_block_count -
sbi->current_reserved_blocks;
- if (!__allow_reserved_blocks(sbi, inode, cap))
+ if (test_opt(sbi, RESERVE_ROOT) && !__allow_reserved_blocks(sbi, inode, cap))
avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 42faaed6a02d..ffa045b39c01 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -35,15 +35,23 @@
#include <trace/events/f2fs.h>
#include <uapi/linux/f2fs.h>
-static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size)
+static void f2fs_zero_post_eof_page(struct inode *inode,
+ loff_t new_size, bool lock)
{
loff_t old_size = i_size_read(inode);
if (old_size >= new_size)
return;
+ if (mapping_empty(inode->i_mapping))
+ return;
+
+ if (lock)
+ filemap_invalidate_lock(inode->i_mapping);
/* zero or drop pages only in range of [old_size, new_size] */
- truncate_pagecache(inode, old_size);
+ truncate_inode_pages_range(inode->i_mapping, old_size, new_size);
+ if (lock)
+ filemap_invalidate_unlock(inode->i_mapping);
}
static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
@@ -114,9 +122,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
- filemap_invalidate_lock(inode->i_mapping);
- f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT);
- filemap_invalidate_unlock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT, true);
file_update_time(vmf->vma->vm_file);
filemap_invalidate_lock_shared(inode->i_mapping);
@@ -904,8 +910,16 @@ int f2fs_truncate(struct inode *inode)
/* we should check inline_data size */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
- if (err)
+ if (err) {
+ /*
+ * Always truncate page #0 to avoid page cache
+ * leak in evict() path.
+ */
+ truncate_inode_pages_range(inode->i_mapping,
+ F2FS_BLK_TO_BYTES(0),
+ F2FS_BLK_END_BYTES(0));
return err;
+ }
}
err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
@@ -1141,7 +1155,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
filemap_invalidate_lock(inode->i_mapping);
if (attr->ia_size > old_size)
- f2fs_zero_post_eof_page(inode, attr->ia_size);
+ f2fs_zero_post_eof_page(inode, attr->ia_size, false);
truncate_setsize(inode, attr->ia_size);
if (attr->ia_size <= old_size)
@@ -1260,9 +1274,7 @@ static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
- filemap_invalidate_lock(inode->i_mapping);
- f2fs_zero_post_eof_page(inode, offset + len);
- filemap_invalidate_unlock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len, true);
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1547,7 +1559,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
- f2fs_zero_post_eof_page(inode, offset + len);
+ f2fs_zero_post_eof_page(inode, offset + len, false);
f2fs_lock_op(sbi);
f2fs_drop_extent_tree(inode);
@@ -1670,9 +1682,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
- filemap_invalidate_lock(mapping);
- f2fs_zero_post_eof_page(inode, offset + len);
- filemap_invalidate_unlock(mapping);
+ f2fs_zero_post_eof_page(inode, offset + len, true);
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1806,7 +1816,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(mapping);
- f2fs_zero_post_eof_page(inode, offset + len);
+ f2fs_zero_post_eof_page(inode, offset + len, false);
truncate_pagecache(inode, offset);
while (!ret && idx > pg_start) {
@@ -1864,9 +1874,7 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
if (err)
return err;
- filemap_invalidate_lock(inode->i_mapping);
- f2fs_zero_post_eof_page(inode, offset + len);
- filemap_invalidate_unlock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len, true);
f2fs_balance_fs(sbi, true);
@@ -4914,9 +4922,8 @@ static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
if (err)
return err;
- filemap_invalidate_lock(inode->i_mapping);
- f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from));
- filemap_invalidate_unlock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode,
+ iocb->ki_pos + iov_iter_count(from), true);
return count;
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index c0f209f74688..5734e0386468 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1794,6 +1794,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
struct folio *sum_folio = filemap_get_folio(META_MAPPING(sbi),
GET_SUM_BLOCK(sbi, segno));
+ if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, segno))) {
+ f2fs_err(sbi, "%s: segment %u is used by log",
+ __func__, segno);
+ f2fs_bug_on(sbi, 1);
+ goto skip;
+ }
+
if (get_valid_blocks(sbi, segno, false) == 0)
goto freed;
if (gc_type == BG_GC && __is_large_section(sbi) &&
@@ -1805,7 +1812,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
sum = folio_address(sum_folio);
if (type != GET_SUM_TYPE((&sum->footer))) {
- f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT",
+ f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SIT and SSA",
segno, type, GET_SUM_TYPE((&sum->footer)));
f2fs_stop_checkpoint(sbi, false,
STOP_CP_REASON_CORRUPTED_SUMMARY);
@@ -2068,6 +2075,13 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi,
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
+ /*
+ * avoid migrating empty section, as it can be allocated by
+ * log in parallel.
+ */
+ if (!get_valid_blocks(sbi, segno, true))
+ continue;
+
if (is_cursec(sbi, GET_SEC_FROM_SEG(sbi, segno)))
continue;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index e16c4e2830c2..8086a3456e4d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -988,6 +988,10 @@ static int f2fs_parse_param(struct fs_context *fc, struct fs_parameter *param)
ctx_set_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT);
break;
case Opt_checkpoint_enable:
+ F2FS_CTX_INFO(ctx).unusable_cap_perc = 0;
+ ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap_perc;
+ F2FS_CTX_INFO(ctx).unusable_cap = 0;
+ ctx->spec_mask |= F2FS_SPEC_checkpoint_disable_cap;
ctx_clear_opt(ctx, F2FS_MOUNT_DISABLE_CHECKPOINT);
break;
default:
@@ -1185,7 +1189,11 @@ static int f2fs_check_quota_consistency(struct fs_context *fc,
goto err_jquota_change;
if (old_qname) {
- if (strcmp(old_qname, new_qname) == 0) {
+ if (!new_qname) {
+ f2fs_info(sbi, "remove qf_name %s",
+ old_qname);
+ continue;
+ } else if (strcmp(old_qname, new_qname) == 0) {
ctx->qname_mask &= ~(1 << i);
continue;
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 4adcf09d4b01..c7351ca07065 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1175,7 +1175,6 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
num = min(iov_iter_count(ii), fc->max_write);
ap->args.in_pages = true;
- ap->descs[0].offset = offset;
while (num && ap->num_folios < max_folios) {
size_t tmp;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 72d95185a39f..bc67fa058c84 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1442,6 +1442,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ int ret;
if (!(fl->c.flc_flags & FL_POSIX))
return -ENOLCK;
@@ -1450,14 +1451,20 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
locks_lock_file_wait(file, fl);
return -EIO;
}
- if (cmd == F_CANCELLK)
- return dlm_posix_cancel(ls->ls_dlm, ip->i_no_addr, file, fl);
- else if (IS_GETLK(cmd))
- return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
- else if (lock_is_unlock(fl))
- return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
- else
- return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
+ down_read(&ls->ls_sem);
+ ret = -ENODEV;
+ if (likely(ls->ls_dlm != NULL)) {
+ if (cmd == F_CANCELLK)
+ ret = dlm_posix_cancel(ls->ls_dlm, ip->i_no_addr, file, fl);
+ else if (IS_GETLK(cmd))
+ ret = dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
+ else if (lock_is_unlock(fl))
+ ret = dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
+ else
+ ret = dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
+ }
+ up_read(&ls->ls_sem);
+ return ret;
}
static void __flock_holder_uninit(struct file *file, struct gfs2_holder *fl_gh)
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index b6fd1cb17de7..a6535413a0b4 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -502,7 +502,7 @@ static bool do_promote(struct gfs2_glock *gl)
*/
if (list_is_first(&gh->gh_list, &gl->gl_holders))
return false;
- do_error(gl, 0);
+ do_error(gl, 0); /* Fail queued try locks */
break;
}
set_bit(HIF_HOLDER, &gh->gh_iflags);
@@ -703,44 +703,25 @@ __acquires(&gl->gl_lockref.lock)
lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP);
GLOCK_BUG_ON(gl, gl->gl_state == target);
GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
- if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
- glops->go_inval) {
- /*
- * If another process is already doing the invalidate, let that
- * finish first. The glock state machine will get back to this
- * holder again later.
- */
- if (test_and_set_bit(GLF_INVALIDATE_IN_PROGRESS,
- &gl->gl_flags))
- return;
- do_error(gl, 0); /* Fail queued try locks */
- }
- gl->gl_req = target;
- set_bit(GLF_BLOCKING, &gl->gl_flags);
- if ((gl->gl_req == LM_ST_UNLOCKED) ||
- (gl->gl_state == LM_ST_EXCLUSIVE) ||
- (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
- clear_bit(GLF_BLOCKING, &gl->gl_flags);
- if (!glops->go_inval && !glops->go_sync)
+ if (!glops->go_inval || !glops->go_sync)
goto skip_inval;
spin_unlock(&gl->gl_lockref.lock);
- if (glops->go_sync) {
- ret = glops->go_sync(gl);
- /* If we had a problem syncing (due to io errors or whatever,
- * we should not invalidate the metadata or tell dlm to
- * release the glock to other nodes.
- */
- if (ret) {
- if (cmpxchg(&sdp->sd_log_error, 0, ret)) {
- fs_err(sdp, "Error %d syncing glock \n", ret);
- gfs2_dump_glock(NULL, gl, true);
- }
- spin_lock(&gl->gl_lockref.lock);
- goto skip_inval;
+ ret = glops->go_sync(gl);
+ /* If we had a problem syncing (due to io errors or whatever,
+ * we should not invalidate the metadata or tell dlm to
+ * release the glock to other nodes.
+ */
+ if (ret) {
+ if (cmpxchg(&sdp->sd_log_error, 0, ret)) {
+ fs_err(sdp, "Error %d syncing glock\n", ret);
+ gfs2_dump_glock(NULL, gl, true);
}
+ spin_lock(&gl->gl_lockref.lock);
+ goto skip_inval;
}
- if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) {
+
+ if (target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) {
/*
* The call to go_sync should have cleared out the ail list.
* If there are still items, we have a problem. We ought to
@@ -755,7 +736,6 @@ __acquires(&gl->gl_lockref.lock)
gfs2_dump_glock(NULL, gl, true);
}
glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
- clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
}
spin_lock(&gl->gl_lockref.lock);
@@ -805,8 +785,6 @@ skip_inval:
clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
return;
- } else {
- clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
}
}
@@ -816,21 +794,22 @@ skip_inval:
ret = ls->ls_ops->lm_lock(gl, target, lck_flags);
spin_lock(&gl->gl_lockref.lock);
- if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED &&
- target == LM_ST_UNLOCKED &&
- test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) {
+ if (!ret) {
+ /* The operation will be completed asynchronously. */
+ return;
+ }
+ clear_bit(GLF_PENDING_REPLY, &gl->gl_flags);
+
+ if (ret == -ENODEV && gl->gl_target == LM_ST_UNLOCKED &&
+ target == LM_ST_UNLOCKED) {
/*
* The lockspace has been released and the lock has
* been unlocked implicitly.
*/
- } else if (ret) {
+ } else {
fs_err(sdp, "lm_lock ret %d\n", ret);
target = gl->gl_state | LM_OUT_ERROR;
- } else {
- /* The operation will be completed asynchronously. */
- return;
}
- clear_bit(GLF_PENDING_REPLY, &gl->gl_flags);
}
/* Complete the operation now. */
@@ -1462,6 +1441,24 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
va_end(args);
}
+static bool gfs2_should_queue_trylock(struct gfs2_glock *gl,
+ struct gfs2_holder *gh)
+{
+ struct gfs2_holder *current_gh, *gh2;
+
+ current_gh = find_first_holder(gl);
+ if (current_gh && !may_grant(gl, current_gh, gh))
+ return false;
+
+ list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
+ if (test_bit(HIF_HOLDER, &gh2->gh_iflags))
+ continue;
+ if (!(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
+ return false;
+ }
+ return true;
+}
+
static inline bool pid_is_meaningful(const struct gfs2_holder *gh)
{
if (!(gh->gh_flags & GL_NOPID))
@@ -1480,27 +1477,20 @@ static inline bool pid_is_meaningful(const struct gfs2_holder *gh)
*/
static inline void add_to_queue(struct gfs2_holder *gh)
-__releases(&gl->gl_lockref.lock)
-__acquires(&gl->gl_lockref.lock)
{
struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct gfs2_holder *gh2;
- int try_futile = 0;
GLOCK_BUG_ON(gl, gh->gh_owner_pid == NULL);
if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
GLOCK_BUG_ON(gl, true);
- if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
- if (test_bit(GLF_LOCK, &gl->gl_flags)) {
- struct gfs2_holder *current_gh;
-
- current_gh = find_first_holder(gl);
- try_futile = !may_grant(gl, current_gh, gh);
- }
- if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
- goto fail;
+ if ((gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) &&
+ !gfs2_should_queue_trylock(gl, gh)) {
+ gh->gh_error = GLR_TRYFAILED;
+ gfs2_holder_wake(gh);
+ return;
}
list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
@@ -1512,15 +1502,6 @@ __acquires(&gl->gl_lockref.lock)
continue;
goto trap_recursive;
}
- list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
- if (try_futile &&
- !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
-fail:
- gh->gh_error = GLR_TRYFAILED;
- gfs2_holder_wake(gh);
- return;
- }
- }
trace_gfs2_glock_queue(gh, 1);
gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
@@ -2321,8 +2302,6 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
*p++ = 'y';
if (test_bit(GLF_LFLUSH, gflags))
*p++ = 'f';
- if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags))
- *p++ = 'i';
if (test_bit(GLF_PENDING_REPLY, gflags))
*p++ = 'R';
if (test_bit(GLF_HAVE_REPLY, gflags))
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 9339a3bff6ee..d041b922b45e 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -68,6 +68,10 @@ enum {
* also be granted in SHARED. The preferred state is whichever is compatible
* with other granted locks, or the specified state if no other locks exist.
*
+ * In addition, when a lock is already held in EX mode locally, a SHARED or
+ * DEFERRED mode request with the LM_FLAG_ANY flag set will be granted.
+ * (The LM_FLAG_ANY flag is only use for SHARED mode requests currently.)
+ *
* LM_FLAG_NODE_SCOPE
* This holder agrees to share the lock within this node. In other words,
* the glock is held in EX mode according to DLM, but local holders on the
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index d4ad82f47eee..3fcb7ab198d4 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -319,7 +319,6 @@ enum {
GLF_DEMOTE_IN_PROGRESS = 5,
GLF_DIRTY = 6,
GLF_LFLUSH = 7,
- GLF_INVALIDATE_IN_PROGRESS = 8,
GLF_HAVE_REPLY = 9,
GLF_INITIAL = 10,
GLF_HAVE_FROZEN_REPLY = 11,
@@ -658,6 +657,8 @@ struct lm_lockstruct {
struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */
char *ls_lvb_bits;
+ struct rw_semaphore ls_sem;
+
spinlock_t ls_recover_spin; /* protects following fields */
unsigned long ls_recover_flags; /* DFL_ */
uint32_t ls_recover_mount; /* gen in first recover_done cb */
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index cee5d199d2d8..6db37c20587d 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -58,6 +58,7 @@ static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
/**
* gfs2_update_reply_times - Update locking statistics
* @gl: The glock to update
+ * @blocking: The operation may have been blocking
*
* This assumes that gl->gl_dstamp has been set earlier.
*
@@ -72,12 +73,12 @@ static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
* TRY_1CB flags are set are classified as non-blocking. All
* other DLM requests are counted as (potentially) blocking.
*/
-static inline void gfs2_update_reply_times(struct gfs2_glock *gl)
+static inline void gfs2_update_reply_times(struct gfs2_glock *gl,
+ bool blocking)
{
struct gfs2_pcpu_lkstats *lks;
const unsigned gltype = gl->gl_name.ln_type;
- unsigned index = test_bit(GLF_BLOCKING, &gl->gl_flags) ?
- GFS2_LKS_SRTTB : GFS2_LKS_SRTT;
+ unsigned index = blocking ? GFS2_LKS_SRTTB : GFS2_LKS_SRTT;
s64 rtt;
preempt_disable();
@@ -119,14 +120,18 @@ static inline void gfs2_update_request_times(struct gfs2_glock *gl)
static void gdlm_ast(void *arg)
{
struct gfs2_glock *gl = arg;
+ bool blocking;
unsigned ret;
+ blocking = test_bit(GLF_BLOCKING, &gl->gl_flags);
+ gfs2_update_reply_times(gl, blocking);
+ clear_bit(GLF_BLOCKING, &gl->gl_flags);
+
/* If the glock is dead, we only react to a dlm_unlock() reply. */
if (__lockref_is_dead(&gl->gl_lockref) &&
gl->gl_lksb.sb_status != -DLM_EUNLOCK)
return;
- gfs2_update_reply_times(gl);
BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
if ((gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) && gl->gl_lksb.sb_lvbptr)
@@ -241,7 +246,7 @@ static bool down_conversion(int cur, int req)
}
static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
- const int cur, const int req)
+ const int req, bool blocking)
{
u32 lkf = 0;
@@ -274,7 +279,7 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
* "upward" lock conversions or else DLM will reject the
* request as invalid.
*/
- if (!down_conversion(cur, req))
+ if (blocking)
lkf |= DLM_LKF_QUECVT;
}
@@ -294,14 +299,20 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
unsigned int flags)
{
struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
+ bool blocking;
int cur, req;
u32 lkf;
char strname[GDLM_STRNAME_BYTES] = "";
int error;
+ gl->gl_req = req_state;
cur = make_mode(gl->gl_name.ln_sbd, gl->gl_state);
req = make_mode(gl->gl_name.ln_sbd, req_state);
- lkf = make_flags(gl, flags, cur, req);
+ blocking = !down_conversion(cur, req) &&
+ !(flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB));
+ lkf = make_flags(gl, flags, req, blocking);
+ if (blocking)
+ set_bit(GLF_BLOCKING, &gl->gl_flags);
gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
if (test_bit(GLF_INITIAL, &gl->gl_flags)) {
@@ -318,8 +329,13 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
*/
again:
- error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname,
- GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
+ down_read(&ls->ls_sem);
+ error = -ENODEV;
+ if (likely(ls->ls_dlm != NULL)) {
+ error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname,
+ GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
+ }
+ up_read(&ls->ls_sem);
if (error == -EBUSY) {
msleep(20);
goto again;
@@ -341,7 +357,6 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
return;
}
- clear_bit(GLF_BLOCKING, &gl->gl_flags);
gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_update_request_times(gl);
@@ -369,8 +384,13 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
flags |= DLM_LKF_VALBLK;
again:
- error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, flags,
- NULL, gl);
+ down_read(&ls->ls_sem);
+ error = -ENODEV;
+ if (likely(ls->ls_dlm != NULL)) {
+ error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, flags,
+ NULL, gl);
+ }
+ up_read(&ls->ls_sem);
if (error == -EBUSY) {
msleep(20);
goto again;
@@ -386,7 +406,12 @@ again:
static void gdlm_cancel(struct gfs2_glock *gl)
{
struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
- dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
+
+ down_read(&ls->ls_sem);
+ if (likely(ls->ls_dlm != NULL)) {
+ dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
+ }
+ up_read(&ls->ls_sem);
}
/*
@@ -567,7 +592,11 @@ static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name)
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
int error;
- error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls);
+ down_read(&ls->ls_sem);
+ error = -ENODEV;
+ if (likely(ls->ls_dlm != NULL))
+ error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls);
+ up_read(&ls->ls_sem);
if (error) {
fs_err(sdp, "%s lkid %x error %d\n",
name, lksb->sb_lkid, error);
@@ -594,9 +623,14 @@ static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags,
memset(strname, 0, GDLM_STRNAME_BYTES);
snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num);
- error = dlm_lock(ls->ls_dlm, mode, lksb, flags,
- strname, GDLM_STRNAME_BYTES - 1,
- 0, sync_wait_cb, ls, NULL);
+ down_read(&ls->ls_sem);
+ error = -ENODEV;
+ if (likely(ls->ls_dlm != NULL)) {
+ error = dlm_lock(ls->ls_dlm, mode, lksb, flags,
+ strname, GDLM_STRNAME_BYTES - 1,
+ 0, sync_wait_cb, ls, NULL);
+ }
+ up_read(&ls->ls_sem);
if (error) {
fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n",
name, lksb->sb_lkid, flags, mode, error);
@@ -1323,6 +1357,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
*/
INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func);
+ ls->ls_dlm = NULL;
spin_lock_init(&ls->ls_recover_spin);
ls->ls_recover_flags = 0;
ls->ls_recover_mount = 0;
@@ -1357,6 +1392,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
* create/join lockspace
*/
+ init_rwsem(&ls->ls_sem);
error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE,
&gdlm_lockspace_ops, sdp, &ops_result,
&ls->ls_dlm);
@@ -1436,10 +1472,12 @@ static void gdlm_unmount(struct gfs2_sbd *sdp)
/* mounted_lock and control_lock will be purged in dlm recovery */
release:
+ down_write(&ls->ls_sem);
if (ls->ls_dlm) {
dlm_release_lockspace(ls->ls_dlm, 2);
ls->ls_dlm = NULL;
}
+ up_write(&ls->ls_sem);
free_recover_size(ls);
}
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 26036ffc3f33..1c2507a27318 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -52,7 +52,6 @@
{(1UL << GLF_DEMOTE_IN_PROGRESS), "p" }, \
{(1UL << GLF_DIRTY), "y" }, \
{(1UL << GLF_LFLUSH), "f" }, \
- {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \
{(1UL << GLF_PENDING_REPLY), "R" }, \
{(1UL << GLF_HAVE_REPLY), "r" }, \
{(1UL << GLF_INITIAL), "a" }, \
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 876bbb80fb4d..1b3e27a0d5e0 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -204,7 +204,7 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx)
fd.entrylength);
type = be16_to_cpu(entry.type);
len = NLS_MAX_CHARSET_SIZE * HFSPLUS_MAX_STRLEN;
- err = hfsplus_uni2asc(sb, &fd.key->cat.name, strbuf, &len);
+ err = hfsplus_uni2asc_str(sb, &fd.key->cat.name, strbuf, &len);
if (err)
goto out;
if (type == HFSPLUS_FOLDER) {
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 96a5c24813dd..2311e4be4e86 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -521,8 +521,12 @@ int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
const struct hfsplus_unistr *s2);
int hfsplus_strcmp(const struct hfsplus_unistr *s1,
const struct hfsplus_unistr *s2);
-int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr,
- char *astr, int *len_p);
+int hfsplus_uni2asc_str(struct super_block *sb,
+ const struct hfsplus_unistr *ustr, char *astr,
+ int *len_p);
+int hfsplus_uni2asc_xattr_str(struct super_block *sb,
+ const struct hfsplus_attr_unistr *ustr,
+ char *astr, int *len_p);
int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
int max_unistr_len, const char *astr, int len);
int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str);
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index 36b6cf2a3abb..862ba27f1628 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -119,9 +119,8 @@ static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
return NULL;
}
-int hfsplus_uni2asc(struct super_block *sb,
- const struct hfsplus_unistr *ustr,
- char *astr, int *len_p)
+static int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr,
+ int max_len, char *astr, int *len_p)
{
const hfsplus_unichr *ip;
struct nls_table *nls = HFSPLUS_SB(sb)->nls;
@@ -134,8 +133,8 @@ int hfsplus_uni2asc(struct super_block *sb,
ip = ustr->unicode;
ustrlen = be16_to_cpu(ustr->length);
- if (ustrlen > HFSPLUS_MAX_STRLEN) {
- ustrlen = HFSPLUS_MAX_STRLEN;
+ if (ustrlen > max_len) {
+ ustrlen = max_len;
pr_err("invalid length %u has been corrected to %d\n",
be16_to_cpu(ustr->length), ustrlen);
}
@@ -256,6 +255,21 @@ out:
return res;
}
+inline int hfsplus_uni2asc_str(struct super_block *sb,
+ const struct hfsplus_unistr *ustr, char *astr,
+ int *len_p)
+{
+ return hfsplus_uni2asc(sb, ustr, HFSPLUS_MAX_STRLEN, astr, len_p);
+}
+
+inline int hfsplus_uni2asc_xattr_str(struct super_block *sb,
+ const struct hfsplus_attr_unistr *ustr,
+ char *astr, int *len_p)
+{
+ return hfsplus_uni2asc(sb, (const struct hfsplus_unistr *)ustr,
+ HFSPLUS_ATTR_MAX_STRLEN, astr, len_p);
+}
+
/*
* Convert one or more ASCII characters into a single unicode character.
* Returns the number of ASCII characters corresponding to the unicode char.
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index 18dc3d254d21..c951fa9835aa 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -735,9 +735,9 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
goto end_listxattr;
xattr_name_len = NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN;
- if (hfsplus_uni2asc(inode->i_sb,
- (const struct hfsplus_unistr *)&fd.key->attr.key_name,
- strbuf, &xattr_name_len)) {
+ if (hfsplus_uni2asc_xattr_str(inode->i_sb,
+ &fd.key->attr.key_name, strbuf,
+ &xattr_name_len)) {
pr_err("unicode conversion failed\n");
res = -EIO;
goto end_listxattr;
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index 97abf62f109d..31ce210f032a 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -49,11 +49,6 @@ struct nfs_local_fsync_ctx {
static bool localio_enabled __read_mostly = true;
module_param(localio_enabled, bool, 0644);
-static bool localio_O_DIRECT_semantics __read_mostly = false;
-module_param(localio_O_DIRECT_semantics, bool, 0644);
-MODULE_PARM_DESC(localio_O_DIRECT_semantics,
- "LOCALIO will use O_DIRECT semantics to filesystem.");
-
static inline bool nfs_client_is_local(const struct nfs_client *clp)
{
return !!rcu_access_pointer(clp->cl_uuid.net);
@@ -321,12 +316,9 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
return NULL;
}
- if (localio_O_DIRECT_semantics &&
- test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
- iocb->kiocb.ki_filp = file;
+ init_sync_kiocb(&iocb->kiocb, file);
+ if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags))
iocb->kiocb.ki_flags = IOCB_DIRECT;
- } else
- init_sync_kiocb(&iocb->kiocb, file);
iocb->kiocb.ki_pos = hdr->args.offset;
iocb->hdr = hdr;
@@ -336,6 +328,30 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
return iocb;
}
+static bool nfs_iov_iter_aligned_bvec(const struct iov_iter *i,
+ loff_t offset, unsigned int addr_mask, unsigned int len_mask)
+{
+ const struct bio_vec *bvec = i->bvec;
+ size_t skip = i->iov_offset;
+ size_t size = i->count;
+
+ if ((offset | size) & len_mask)
+ return false;
+ do {
+ size_t len = bvec->bv_len;
+
+ if (len > size)
+ len = size;
+ if ((unsigned long)(bvec->bv_offset + skip) & addr_mask)
+ return false;
+ bvec++;
+ size -= len;
+ skip = 0;
+ } while (size);
+
+ return true;
+}
+
static void
nfs_local_iter_init(struct iov_iter *i, struct nfs_local_kiocb *iocb, int dir)
{
@@ -345,6 +361,25 @@ nfs_local_iter_init(struct iov_iter *i, struct nfs_local_kiocb *iocb, int dir)
hdr->args.count + hdr->args.pgbase);
if (hdr->args.pgbase != 0)
iov_iter_advance(i, hdr->args.pgbase);
+
+ if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
+ u32 nf_dio_mem_align, nf_dio_offset_align, nf_dio_read_offset_align;
+ /* Verify the IO is DIO-aligned as required */
+ nfs_to->nfsd_file_dio_alignment(iocb->localio, &nf_dio_mem_align,
+ &nf_dio_offset_align,
+ &nf_dio_read_offset_align);
+ if (dir == READ)
+ nf_dio_offset_align = nf_dio_read_offset_align;
+
+ if (nf_dio_mem_align && nf_dio_offset_align &&
+ nfs_iov_iter_aligned_bvec(i, hdr->args.offset,
+ nf_dio_mem_align - 1,
+ nf_dio_offset_align - 1))
+ return; /* is DIO-aligned */
+
+ /* Fallback to using buffered for this misaligned IO */
+ iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
+ }
}
static void
@@ -405,6 +440,11 @@ nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
struct nfs_pgio_header *hdr = iocb->hdr;
struct file *filp = iocb->kiocb.ki_filp;
+ if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
+ /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
+ pr_info_ratelimited("nfs: Unexpected direct I/O read alignment failure\n");
+ }
+
nfs_local_pgio_done(hdr, status);
/*
@@ -597,6 +637,11 @@ nfs_local_write_done(struct nfs_local_kiocb *iocb, long status)
dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0);
+ if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
+ /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
+ pr_info_ratelimited("nfs: Unexpected direct I/O write alignment failure\n");
+ }
+
/* Handle short writes as if they are ENOSPC */
if (status > 0 && status < hdr->args.count) {
hdr->mds_offset += status;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ce61253efd45..611e6283c194 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -9442,7 +9442,7 @@ static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args
goto out;
if (rcvd->max_rqst_sz > sent->max_rqst_sz)
return -EINVAL;
- if (rcvd->max_resp_sz < sent->max_resp_sz)
+ if (rcvd->max_resp_sz > sent->max_resp_sz)
return -EINVAL;
if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached)
return -EINVAL;
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 732abf6b92a5..7ca1dedf4e04 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -231,6 +231,9 @@ nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
refcount_set(&nf->nf_ref, 1);
nf->nf_may = need;
nf->nf_mark = NULL;
+ nf->nf_dio_mem_align = 0;
+ nf->nf_dio_offset_align = 0;
+ nf->nf_dio_read_offset_align = 0;
return nf;
}
@@ -1070,6 +1073,35 @@ nfsd_file_is_cached(struct inode *inode)
}
static __be32
+nfsd_file_get_dio_attrs(const struct svc_fh *fhp, struct nfsd_file *nf)
+{
+ struct inode *inode = file_inode(nf->nf_file);
+ struct kstat stat;
+ __be32 status;
+
+ /* Currently only need to get DIO alignment info for regular files */
+ if (!S_ISREG(inode->i_mode))
+ return nfs_ok;
+
+ status = fh_getattr(fhp, &stat);
+ if (status != nfs_ok)
+ return status;
+
+ trace_nfsd_file_get_dio_attrs(inode, &stat);
+
+ if (stat.result_mask & STATX_DIOALIGN) {
+ nf->nf_dio_mem_align = stat.dio_mem_align;
+ nf->nf_dio_offset_align = stat.dio_offset_align;
+ }
+ if (stat.result_mask & STATX_DIO_READ_ALIGN)
+ nf->nf_dio_read_offset_align = stat.dio_read_offset_align;
+ else
+ nf->nf_dio_read_offset_align = nf->nf_dio_offset_align;
+
+ return nfs_ok;
+}
+
+static __be32
nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net,
struct svc_cred *cred,
struct auth_domain *client,
@@ -1187,6 +1219,8 @@ open_file:
}
status = nfserrno(ret);
trace_nfsd_file_open(nf, status);
+ if (status == nfs_ok)
+ status = nfsd_file_get_dio_attrs(fhp, nf);
}
} else
status = nfserr_jukebox;
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 722b26c71e45..237a05c74211 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -54,6 +54,10 @@ struct nfsd_file {
struct list_head nf_gc;
struct rcu_head nf_rcu;
ktime_t nf_birthtime;
+
+ u32 nf_dio_mem_align;
+ u32 nf_dio_offset_align;
+ u32 nf_dio_read_offset_align;
};
int nfsd_file_cache_init(void);
diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
index cb237f1b902a..9e0a37cd29d8 100644
--- a/fs/nfsd/localio.c
+++ b/fs/nfsd/localio.c
@@ -117,6 +117,16 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom,
return localio;
}
+static void nfsd_file_dio_alignment(struct nfsd_file *nf,
+ u32 *nf_dio_mem_align,
+ u32 *nf_dio_offset_align,
+ u32 *nf_dio_read_offset_align)
+{
+ *nf_dio_mem_align = nf->nf_dio_mem_align;
+ *nf_dio_offset_align = nf->nf_dio_offset_align;
+ *nf_dio_read_offset_align = nf->nf_dio_read_offset_align;
+}
+
static const struct nfsd_localio_operations nfsd_localio_ops = {
.nfsd_net_try_get = nfsd_net_try_get,
.nfsd_net_put = nfsd_net_put,
@@ -124,6 +134,7 @@ static const struct nfsd_localio_operations nfsd_localio_ops = {
.nfsd_file_put_local = nfsd_file_put_local,
.nfsd_file_get_local = nfsd_file_get_local,
.nfsd_file_file = nfsd_file_file,
+ .nfsd_file_dio_alignment = nfsd_file_dio_alignment,
};
void nfsd_localio_ops_init(void)
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index a664fdf1161e..6e2c8e2aab10 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -1133,6 +1133,33 @@ TRACE_EVENT(nfsd_file_alloc,
)
);
+TRACE_EVENT(nfsd_file_get_dio_attrs,
+ TP_PROTO(
+ const struct inode *inode,
+ const struct kstat *stat
+ ),
+ TP_ARGS(inode, stat),
+ TP_STRUCT__entry(
+ __field(const void *, inode)
+ __field(unsigned long, mask)
+ __field(u32, mem_align)
+ __field(u32, offset_align)
+ __field(u32, read_offset_align)
+ ),
+ TP_fast_assign(
+ __entry->inode = inode;
+ __entry->mask = stat->result_mask;
+ __entry->mem_align = stat->dio_mem_align;
+ __entry->offset_align = stat->dio_offset_align;
+ __entry->read_offset_align = stat->dio_read_offset_align;
+ ),
+ TP_printk("inode=%p flags=%s mem_align=%u offset_align=%u read_offset_align=%u",
+ __entry->inode, show_statx_mask(__entry->mask),
+ __entry->mem_align, __entry->offset_align,
+ __entry->read_offset_align
+ )
+);
+
TRACE_EVENT(nfsd_file_acquire,
TP_PROTO(
const struct svc_rqst *rqstp,
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index eff04959606f..fde3e0c11dba 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -185,6 +185,10 @@ static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat)
u32 request_mask = STATX_BASIC_STATS;
struct path p = {.mnt = fh->fh_export->ex_path.mnt,
.dentry = fh->fh_dentry};
+ struct inode *inode = d_inode(p.dentry);
+
+ if (S_ISREG(inode->i_mode))
+ request_mask |= (STATX_DIOALIGN | STATX_DIO_READ_ALIGN);
if (fh->fh_maxsize == NFS4_FHSIZE)
request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index b192ee068a7a..561339b4cf75 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1999,7 +1999,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
user_ns = path.mnt->mnt_sb->s_user_ns;
obj = path.mnt->mnt_sb;
} else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) {
+ ret = -EINVAL;
mntns = mnt_ns_from_dentry(path.dentry);
+ if (!mntns)
+ goto path_put_and_out;
user_ns = mntns->user_ns;
obj = mntns;
}
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index 1bf2a6593dec..6d1bf890929d 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -1508,6 +1508,16 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
bmp_size = bmp_size_v = le32_to_cpu(bmp->res.data_size);
}
+ /*
+ * Index blocks exist, but $BITMAP has zero valid bits.
+ * This implies an on-disk corruption and must be rejected.
+ */
+ if (in->name == I30_NAME &&
+ unlikely(bmp_size_v == 0 && indx->alloc_run.count)) {
+ err = -EINVAL;
+ goto out1;
+ }
+
bit = bmp_size << 3;
}
diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c
index 6e86d66197ef..88550085f745 100644
--- a/fs/ntfs3/run.c
+++ b/fs/ntfs3/run.c
@@ -9,6 +9,7 @@
#include <linux/blkdev.h>
#include <linux/fs.h>
#include <linux/log2.h>
+#include <linux/overflow.h>
#include "debug.h"
#include "ntfs.h"
@@ -982,14 +983,18 @@ int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino,
if (!dlcn)
return -EINVAL;
- lcn = prev_lcn + dlcn;
+
+ if (check_add_overflow(prev_lcn, dlcn, &lcn))
+ return -EINVAL;
prev_lcn = lcn;
} else {
/* The size of 'dlcn' can't be > 8. */
return -EINVAL;
}
- next_vcn = vcn64 + len;
+ if (check_add_overflow(vcn64, len, &next_vcn))
+ return -EINVAL;
+
/* Check boundary. */
if (next_vcn > evcn + 1)
return -EINVAL;
@@ -1153,7 +1158,8 @@ int run_get_highest_vcn(CLST vcn, const u8 *run_buf, u64 *highest_vcn)
return -EINVAL;
run_buf += size_size + offset_size;
- vcn64 += len;
+ if (check_add_overflow(vcn64, len, &vcn64))
+ return -EINVAL;
#ifndef CONFIG_NTFS3_64BIT_CLUSTER
if (vcn64 > 0x100000000ull)
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 0f045e45fa0c..439742cec3c2 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -1011,6 +1011,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
printk(KERN_ERR "ocfs2: Could not determine"
" locking version\n");
user_cluster_disconnect(conn);
+ lc = NULL;
goto out;
}
wait_event(lc->oc_wait, (atomic_read(&lc->oc_this_node) > 0));
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index e586f3f4b5c9..68286673afc9 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -4219,7 +4219,7 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len,
static void *smb2_aead_req_alloc(struct crypto_aead *tfm, const struct smb_rqst *rqst,
int num_rqst, const u8 *sig, u8 **iv,
struct aead_request **req, struct sg_table *sgt,
- unsigned int *num_sgs, size_t *sensitive_size)
+ unsigned int *num_sgs)
{
unsigned int req_size = sizeof(**req) + crypto_aead_reqsize(tfm);
unsigned int iv_size = crypto_aead_ivsize(tfm);
@@ -4236,9 +4236,8 @@ static void *smb2_aead_req_alloc(struct crypto_aead *tfm, const struct smb_rqst
len += req_size;
len = ALIGN(len, __alignof__(struct scatterlist));
len += array_size(*num_sgs, sizeof(struct scatterlist));
- *sensitive_size = len;
- p = kvzalloc(len, GFP_NOFS);
+ p = kzalloc(len, GFP_NOFS);
if (!p)
return ERR_PTR(-ENOMEM);
@@ -4252,16 +4251,14 @@ static void *smb2_aead_req_alloc(struct crypto_aead *tfm, const struct smb_rqst
static void *smb2_get_aead_req(struct crypto_aead *tfm, struct smb_rqst *rqst,
int num_rqst, const u8 *sig, u8 **iv,
- struct aead_request **req, struct scatterlist **sgl,
- size_t *sensitive_size)
+ struct aead_request **req, struct scatterlist **sgl)
{
struct sg_table sgtable = {};
unsigned int skip, num_sgs, i, j;
ssize_t rc;
void *p;
- p = smb2_aead_req_alloc(tfm, rqst, num_rqst, sig, iv, req, &sgtable,
- &num_sgs, sensitive_size);
+ p = smb2_aead_req_alloc(tfm, rqst, num_rqst, sig, iv, req, &sgtable, &num_sgs);
if (IS_ERR(p))
return ERR_CAST(p);
@@ -4350,7 +4347,6 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
DECLARE_CRYPTO_WAIT(wait);
unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
void *creq;
- size_t sensitive_size;
rc = smb2_get_enc_key(server, le64_to_cpu(tr_hdr->SessionId), enc, key);
if (rc) {
@@ -4376,8 +4372,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
return rc;
}
- creq = smb2_get_aead_req(tfm, rqst, num_rqst, sign, &iv, &req, &sg,
- &sensitive_size);
+ creq = smb2_get_aead_req(tfm, rqst, num_rqst, sign, &iv, &req, &sg);
if (IS_ERR(creq))
return PTR_ERR(creq);
@@ -4407,7 +4402,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
if (!rc && enc)
memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE);
- kvfree_sensitive(creq, sensitive_size);
+ kfree_sensitive(creq);
return rc;
}
diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c
index e0fce5033004..6480945c2459 100644
--- a/fs/smb/client/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
@@ -179,6 +179,8 @@ static int smbd_conn_upcall(
struct smbd_connection *info = id->context;
struct smbdirect_socket *sc = &info->socket;
const char *event_name = rdma_event_msg(event->event);
+ u8 peer_initiator_depth;
+ u8 peer_responder_resources;
log_rdma_event(INFO, "event=%s status=%d\n",
event_name, event->status);
@@ -204,6 +206,85 @@ static int smbd_conn_upcall(
case RDMA_CM_EVENT_ESTABLISHED:
log_rdma_event(INFO, "connected event=%s\n", event_name);
+
+ /*
+ * Here we work around an inconsistency between
+ * iWarp and other devices (at least rxe and irdma using RoCEv2)
+ */
+ if (rdma_protocol_iwarp(id->device, id->port_num)) {
+ /*
+ * iWarp devices report the peer's values
+ * with the perspective of the peer here.
+ * Tested with siw and irdma (in iwarp mode)
+ * We need to change to our perspective here,
+ * so we need to switch the values.
+ */
+ peer_initiator_depth = event->param.conn.responder_resources;
+ peer_responder_resources = event->param.conn.initiator_depth;
+ } else {
+ /*
+ * Non iWarp devices report the peer's values
+ * already changed to our perspective here.
+ * Tested with rxe and irdma (in roce mode).
+ */
+ peer_initiator_depth = event->param.conn.initiator_depth;
+ peer_responder_resources = event->param.conn.responder_resources;
+ }
+ if (rdma_protocol_iwarp(id->device, id->port_num) &&
+ event->param.conn.private_data_len == 8) {
+ /*
+ * Legacy clients with only iWarp MPA v1 support
+ * need a private blob in order to negotiate
+ * the IRD/ORD values.
+ */
+ const __be32 *ird_ord_hdr = event->param.conn.private_data;
+ u32 ird32 = be32_to_cpu(ird_ord_hdr[0]);
+ u32 ord32 = be32_to_cpu(ird_ord_hdr[1]);
+
+ /*
+ * cifs.ko sends the legacy IRD/ORD negotiation
+ * event if iWarp MPA v2 was used.
+ *
+ * Here we check that the values match and only
+ * mark the client as legacy if they don't match.
+ */
+ if ((u32)event->param.conn.initiator_depth != ird32 ||
+ (u32)event->param.conn.responder_resources != ord32) {
+ /*
+ * There are broken clients (old cifs.ko)
+ * using little endian and also
+ * struct rdma_conn_param only uses u8
+ * for initiator_depth and responder_resources,
+ * so we truncate the value to U8_MAX.
+ *
+ * smb_direct_accept_client() will then
+ * do the real negotiation in order to
+ * select the minimum between client and
+ * server.
+ */
+ ird32 = min_t(u32, ird32, U8_MAX);
+ ord32 = min_t(u32, ord32, U8_MAX);
+
+ info->legacy_iwarp = true;
+ peer_initiator_depth = (u8)ird32;
+ peer_responder_resources = (u8)ord32;
+ }
+ }
+
+ /*
+ * negotiate the value by using the minimum
+ * between client and server if the client provided
+ * non 0 values.
+ */
+ if (peer_initiator_depth != 0)
+ info->initiator_depth =
+ min_t(u8, info->initiator_depth,
+ peer_initiator_depth);
+ if (peer_responder_resources != 0)
+ info->responder_resources =
+ min_t(u8, info->responder_resources,
+ peer_responder_resources);
+
sc->status = SMBDIRECT_SOCKET_CONNECTED;
wake_up_interruptible(&info->status_wait);
break;
@@ -1551,7 +1632,7 @@ static struct smbd_connection *_smbd_get_connection(
struct ib_qp_init_attr qp_attr;
struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr;
struct ib_port_immutable port_immutable;
- u32 ird_ord_hdr[2];
+ __be32 ird_ord_hdr[2];
info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL);
if (!info)
@@ -1559,6 +1640,9 @@ static struct smbd_connection *_smbd_get_connection(
sc = &info->socket;
sp = &sc->parameters;
+ info->initiator_depth = 1;
+ info->responder_resources = SMBD_CM_RESPONDER_RESOURCES;
+
sc->status = SMBDIRECT_SOCKET_CONNECTING;
rc = smbd_ia_open(info, dstaddr, port);
if (rc) {
@@ -1639,22 +1723,22 @@ static struct smbd_connection *_smbd_get_connection(
}
sc->ib.qp = sc->rdma.cm_id->qp;
- memset(&conn_param, 0, sizeof(conn_param));
- conn_param.initiator_depth = 0;
-
- conn_param.responder_resources =
- min(sc->ib.dev->attrs.max_qp_rd_atom,
- SMBD_CM_RESPONDER_RESOURCES);
- info->responder_resources = conn_param.responder_resources;
+ info->responder_resources =
+ min_t(u8, info->responder_resources,
+ sc->ib.dev->attrs.max_qp_rd_atom);
log_rdma_mr(INFO, "responder_resources=%d\n",
info->responder_resources);
+ memset(&conn_param, 0, sizeof(conn_param));
+ conn_param.initiator_depth = info->initiator_depth;
+ conn_param.responder_resources = info->responder_resources;
+
/* Need to send IRD/ORD in private data for iWARP */
sc->ib.dev->ops.get_port_immutable(
sc->ib.dev, sc->rdma.cm_id->port_num, &port_immutable);
if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
- ird_ord_hdr[0] = info->responder_resources;
- ird_ord_hdr[1] = 1;
+ ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources);
+ ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth);
conn_param.private_data = ird_ord_hdr;
conn_param.private_data_len = sizeof(ird_ord_hdr);
} else {
@@ -2121,6 +2205,12 @@ static int allocate_mr_list(struct smbd_connection *info)
atomic_set(&info->mr_used_count, 0);
init_waitqueue_head(&info->wait_for_mr_cleanup);
INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work);
+
+ if (info->responder_resources == 0) {
+ log_rdma_mr(ERR, "responder_resources negotiated as 0\n");
+ return -EINVAL;
+ }
+
/* Allocate more MRs (2x) than hardware responder_resources */
for (i = 0; i < info->responder_resources * 2; i++) {
smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL);
diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h
index e45aa9ddd71d..4ca9b2b2c57f 100644
--- a/fs/smb/client/smbdirect.h
+++ b/fs/smb/client/smbdirect.h
@@ -67,7 +67,9 @@ struct smbd_connection {
/* Memory registrations */
/* Maximum number of RDMA read/write outstanding on this connection */
- int responder_resources;
+ bool legacy_iwarp;
+ u8 initiator_depth;
+ u8 responder_resources;
/* Maximum number of pages in a single RDMA write/read on this connection */
int max_frmr_depth;
/*
diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h
index 3f07a612c05b..8ccd57fd904b 100644
--- a/fs/smb/server/ksmbd_netlink.h
+++ b/fs/smb/server/ksmbd_netlink.h
@@ -112,10 +112,11 @@ struct ksmbd_startup_request {
__u32 smbd_max_io_size; /* smbd read write size */
__u32 max_connections; /* Number of maximum simultaneous connections */
__s8 bind_interfaces_only;
- __s8 reserved[503]; /* Reserved room */
+ __u32 max_ip_connections; /* Number of maximum connection per ip address */
+ __s8 reserved[499]; /* Reserved room */
__u32 ifc_list_sz; /* interfaces list size */
__s8 ____payload[];
-};
+} __packed;
#define KSMBD_STARTUP_CONFIG_INTERFACES(s) ((s)->____payload)
diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c
index 9dec4c2940bc..b36d0676dbe5 100644
--- a/fs/smb/server/mgmt/user_session.c
+++ b/fs/smb/server/mgmt/user_session.c
@@ -104,29 +104,32 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name)
if (!entry)
return -ENOMEM;
- down_read(&sess->rpc_lock);
entry->method = method;
entry->id = id = ksmbd_ipc_id_alloc();
if (id < 0)
goto free_entry;
+
+ down_write(&sess->rpc_lock);
old = xa_store(&sess->rpc_handle_list, id, entry, KSMBD_DEFAULT_GFP);
- if (xa_is_err(old))
+ if (xa_is_err(old)) {
+ up_write(&sess->rpc_lock);
goto free_id;
+ }
resp = ksmbd_rpc_open(sess, id);
- if (!resp)
- goto erase_xa;
+ if (!resp) {
+ xa_erase(&sess->rpc_handle_list, entry->id);
+ up_write(&sess->rpc_lock);
+ goto free_id;
+ }
- up_read(&sess->rpc_lock);
+ up_write(&sess->rpc_lock);
kvfree(resp);
return id;
-erase_xa:
- xa_erase(&sess->rpc_handle_list, entry->id);
free_id:
ksmbd_rpc_id_free(entry->id);
free_entry:
kfree(entry);
- up_read(&sess->rpc_lock);
return -EINVAL;
}
@@ -144,9 +147,14 @@ void ksmbd_session_rpc_close(struct ksmbd_session *sess, int id)
int ksmbd_session_rpc_method(struct ksmbd_session *sess, int id)
{
struct ksmbd_session_rpc *entry;
+ int method;
+ down_read(&sess->rpc_lock);
entry = xa_load(&sess->rpc_handle_list, id);
- return entry ? entry->method : 0;
+ method = entry ? entry->method : 0;
+ up_read(&sess->rpc_lock);
+
+ return method;
}
void ksmbd_session_destroy(struct ksmbd_session *sess)
diff --git a/fs/smb/server/server.h b/fs/smb/server/server.h
index 995555febe7d..b8a7317be86b 100644
--- a/fs/smb/server/server.h
+++ b/fs/smb/server/server.h
@@ -43,6 +43,7 @@ struct ksmbd_server_config {
unsigned int auth_mechs;
unsigned int max_connections;
unsigned int max_inflight_req;
+ unsigned int max_ip_connections;
char *conf[SERVER_CONF_WORK_GROUP + 1];
struct task_struct *dh_task;
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index a565fc36cee6..a1db006ab6e9 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -5628,7 +5628,8 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
if (!work->tcon->posix_extensions) {
pr_err("client doesn't negotiate with SMB3.1.1 POSIX Extensions\n");
- rc = -EOPNOTSUPP;
+ path_put(&path);
+ return -EOPNOTSUPP;
} else {
info = (struct filesystem_posix_info *)(rsp->Buffer);
info->OptimalTransferSize = cpu_to_le32(stfs.f_bsize);
diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c
index 2a3e2b0ce557..2aa1b29bea08 100644
--- a/fs/smb/server/transport_ipc.c
+++ b/fs/smb/server/transport_ipc.c
@@ -335,6 +335,9 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req)
if (req->max_connections)
server_conf.max_connections = req->max_connections;
+ if (req->max_ip_connections)
+ server_conf.max_ip_connections = req->max_ip_connections;
+
ret = ksmbd_set_netbios_name(req->netbios_name);
ret |= ksmbd_set_server_string(req->server_string);
ret |= ksmbd_set_work_group(req->work_group);
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index 74dfb6496095..e1f659d3b4cf 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -153,6 +153,10 @@ struct smb_direct_transport {
struct work_struct disconnect_work;
bool negotiation_requested;
+
+ bool legacy_iwarp;
+ u8 initiator_depth;
+ u8 responder_resources;
};
#define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport))
@@ -347,6 +351,9 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
t->cm_id = cm_id;
cm_id->context = t;
+ t->initiator_depth = SMB_DIRECT_CM_INITIATOR_DEPTH;
+ t->responder_resources = 1;
+
t->status = SMB_DIRECT_CS_NEW;
init_waitqueue_head(&t->wait_status);
@@ -1676,21 +1683,21 @@ static int smb_direct_send_negotiate_response(struct smb_direct_transport *t,
static int smb_direct_accept_client(struct smb_direct_transport *t)
{
struct rdma_conn_param conn_param;
- struct ib_port_immutable port_immutable;
- u32 ird_ord_hdr[2];
+ __be32 ird_ord_hdr[2];
int ret;
+ /*
+ * smb_direct_handle_connect_request()
+ * already negotiated t->initiator_depth
+ * and t->responder_resources
+ */
memset(&conn_param, 0, sizeof(conn_param));
- conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom,
- SMB_DIRECT_CM_INITIATOR_DEPTH);
- conn_param.responder_resources = 0;
-
- t->cm_id->device->ops.get_port_immutable(t->cm_id->device,
- t->cm_id->port_num,
- &port_immutable);
- if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
- ird_ord_hdr[0] = conn_param.responder_resources;
- ird_ord_hdr[1] = 1;
+ conn_param.initiator_depth = t->initiator_depth;
+ conn_param.responder_resources = t->responder_resources;
+
+ if (t->legacy_iwarp) {
+ ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources);
+ ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth);
conn_param.private_data = ird_ord_hdr;
conn_param.private_data_len = sizeof(ird_ord_hdr);
} else {
@@ -2081,10 +2088,13 @@ static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
return true;
}
-static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
+static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id,
+ struct rdma_cm_event *event)
{
struct smb_direct_transport *t;
struct task_struct *handler;
+ u8 peer_initiator_depth;
+ u8 peer_responder_resources;
int ret;
if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
@@ -2098,6 +2108,67 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
if (!t)
return -ENOMEM;
+ peer_initiator_depth = event->param.conn.initiator_depth;
+ peer_responder_resources = event->param.conn.responder_resources;
+ if (rdma_protocol_iwarp(new_cm_id->device, new_cm_id->port_num) &&
+ event->param.conn.private_data_len == 8) {
+ /*
+ * Legacy clients with only iWarp MPA v1 support
+ * need a private blob in order to negotiate
+ * the IRD/ORD values.
+ */
+ const __be32 *ird_ord_hdr = event->param.conn.private_data;
+ u32 ird32 = be32_to_cpu(ird_ord_hdr[0]);
+ u32 ord32 = be32_to_cpu(ird_ord_hdr[1]);
+
+ /*
+ * cifs.ko sends the legacy IRD/ORD negotiation
+ * event if iWarp MPA v2 was used.
+ *
+ * Here we check that the values match and only
+ * mark the client as legacy if they don't match.
+ */
+ if ((u32)event->param.conn.initiator_depth != ird32 ||
+ (u32)event->param.conn.responder_resources != ord32) {
+ /*
+ * There are broken clients (old cifs.ko)
+ * using little endian and also
+ * struct rdma_conn_param only uses u8
+ * for initiator_depth and responder_resources,
+ * so we truncate the value to U8_MAX.
+ *
+ * smb_direct_accept_client() will then
+ * do the real negotiation in order to
+ * select the minimum between client and
+ * server.
+ */
+ ird32 = min_t(u32, ird32, U8_MAX);
+ ord32 = min_t(u32, ord32, U8_MAX);
+
+ t->legacy_iwarp = true;
+ peer_initiator_depth = (u8)ird32;
+ peer_responder_resources = (u8)ord32;
+ }
+ }
+
+ /*
+ * First set what the we as server are able to support
+ */
+ t->initiator_depth = min_t(u8, t->initiator_depth,
+ new_cm_id->device->attrs.max_qp_rd_atom);
+
+ /*
+ * negotiate the value by using the minimum
+ * between client and server if the client provided
+ * non 0 values.
+ */
+ if (peer_initiator_depth != 0)
+ t->initiator_depth = min_t(u8, t->initiator_depth,
+ peer_initiator_depth);
+ if (peer_responder_resources != 0)
+ t->responder_resources = min_t(u8, t->responder_resources,
+ peer_responder_resources);
+
ret = smb_direct_connect(t);
if (ret)
goto out_err;
@@ -2122,7 +2193,7 @@ static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
{
switch (event->event) {
case RDMA_CM_EVENT_CONNECT_REQUEST: {
- int ret = smb_direct_handle_connect_request(cm_id);
+ int ret = smb_direct_handle_connect_request(cm_id, event);
if (ret) {
pr_err("Can't create transport: %d\n", ret);
diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c
index 4337df97987d..1009cb324fd5 100644
--- a/fs/smb/server/transport_tcp.c
+++ b/fs/smb/server/transport_tcp.c
@@ -238,6 +238,7 @@ static int ksmbd_kthread_fn(void *p)
struct interface *iface = (struct interface *)p;
struct ksmbd_conn *conn;
int ret;
+ unsigned int max_ip_conns;
while (!kthread_should_stop()) {
mutex_lock(&iface->sock_release_lock);
@@ -255,34 +256,38 @@ static int ksmbd_kthread_fn(void *p)
continue;
}
+ if (!server_conf.max_ip_connections)
+ goto skip_max_ip_conns_limit;
+
/*
* Limits repeated connections from clients with the same IP.
*/
+ max_ip_conns = 0;
down_read(&conn_list_lock);
- list_for_each_entry(conn, &conn_list, conns_list)
+ list_for_each_entry(conn, &conn_list, conns_list) {
#if IS_ENABLED(CONFIG_IPV6)
if (client_sk->sk->sk_family == AF_INET6) {
if (memcmp(&client_sk->sk->sk_v6_daddr,
- &conn->inet6_addr, 16) == 0) {
- ret = -EAGAIN;
- break;
- }
+ &conn->inet6_addr, 16) == 0)
+ max_ip_conns++;
} else if (inet_sk(client_sk->sk)->inet_daddr ==
- conn->inet_addr) {
- ret = -EAGAIN;
- break;
- }
+ conn->inet_addr)
+ max_ip_conns++;
#else
if (inet_sk(client_sk->sk)->inet_daddr ==
- conn->inet_addr) {
+ conn->inet_addr)
+ max_ip_conns++;
+#endif
+ if (server_conf.max_ip_connections <= max_ip_conns) {
ret = -EAGAIN;
break;
}
-#endif
+ }
up_read(&conn_list_lock);
if (ret == -EAGAIN)
continue;
+skip_max_ip_conns_limit:
if (server_conf.max_connections &&
atomic_inc_return(&active_num_conn) >= server_conf.max_connections) {
pr_info_ratelimited("Limit the maximum number of connections(%u)\n",
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index d5918eba27e3..53104f25de51 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -165,6 +165,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
squashfs_i(inode)->start = le32_to_cpu(sqsh_ino->start_block);
squashfs_i(inode)->block_list_start = block;
squashfs_i(inode)->offset = offset;
+ squashfs_i(inode)->parent = 0;
inode->i_data.a_ops = &squashfs_aops;
TRACE("File inode %x:%x, start_block %llx, block_list_start "
@@ -212,6 +213,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
squashfs_i(inode)->start = le64_to_cpu(sqsh_ino->start_block);
squashfs_i(inode)->block_list_start = block;
squashfs_i(inode)->offset = offset;
+ squashfs_i(inode)->parent = 0;
inode->i_data.a_ops = &squashfs_aops;
TRACE("File inode %x:%x, start_block %llx, block_list_start "
@@ -292,6 +294,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
inode->i_mode |= S_IFLNK;
squashfs_i(inode)->start = block;
squashfs_i(inode)->offset = offset;
+ squashfs_i(inode)->parent = 0;
if (type == SQUASHFS_LSYMLINK_TYPE) {
__le32 xattr;
@@ -329,6 +332,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
rdev = le32_to_cpu(sqsh_ino->rdev);
init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
+ squashfs_i(inode)->parent = 0;
TRACE("Device inode %x:%x, rdev %x\n",
SQUASHFS_INODE_BLK(ino), offset, rdev);
@@ -353,6 +357,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
rdev = le32_to_cpu(sqsh_ino->rdev);
init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
+ squashfs_i(inode)->parent = 0;
TRACE("Device inode %x:%x, rdev %x\n",
SQUASHFS_INODE_BLK(ino), offset, rdev);
@@ -373,6 +378,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
inode->i_mode |= S_IFSOCK;
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
init_special_inode(inode, inode->i_mode, 0);
+ squashfs_i(inode)->parent = 0;
break;
}
case SQUASHFS_LFIFO_TYPE:
@@ -392,6 +398,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
inode->i_op = &squashfs_inode_ops;
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
init_special_inode(inode, inode->i_mode, 0);
+ squashfs_i(inode)->parent = 0;
break;
}
default:
diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h
index 2c82d6f2a456..8e497ac07b9a 100644
--- a/fs/squashfs/squashfs_fs_i.h
+++ b/fs/squashfs/squashfs_fs_i.h
@@ -16,6 +16,7 @@ struct squashfs_inode_info {
u64 xattr;
unsigned int xattr_size;
int xattr_count;
+ int parent;
union {
struct {
u64 fragment_block;
@@ -27,7 +28,6 @@ struct squashfs_inode_info {
u64 dir_idx_start;
int dir_idx_offset;
int dir_idx_cnt;
- int parent;
};
};
struct inode vfs_inode;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index f24aa98e6869..a79d73f28aa7 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -2272,6 +2272,9 @@ int udf_current_aext(struct inode *inode, struct extent_position *epos,
if (check_add_overflow(sizeof(struct allocExtDesc),
le32_to_cpu(header->lengthAllocDescs), &alen))
return -1;
+
+ if (alen > epos->bh->b_size)
+ return -1;
}
switch (iinfo->i_alloc_type) {