diff options
Diffstat (limited to 'fs')
44 files changed, 360 insertions, 297 deletions
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index d8b90f95b157..726592868e9c 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -287,7 +287,7 @@ static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev) if (btrfs_op(bio) == BTRFS_MAP_WRITE) btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); - if (!(bio->bi_opf & REQ_RAHEAD)) + else if (!(bio->bi_opf & REQ_RAHEAD)) btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS); if (bio->bi_opf & REQ_PREFLUSH) btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 5b10401d803b..0ef8b8926bfa 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -558,14 +558,15 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end static int sample_block_group_extent_item(struct btrfs_caching_control *caching_ctl, struct btrfs_block_group *block_group, int index, int max_index, - struct btrfs_key *key) + struct btrfs_key *found_key) { struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_root *extent_root; - int ret = 0; u64 search_offset; u64 search_end = block_group->start + block_group->length; struct btrfs_path *path; + struct btrfs_key search_key; + int ret = 0; ASSERT(index >= 0); ASSERT(index <= max_index); @@ -585,37 +586,24 @@ static int sample_block_group_extent_item(struct btrfs_caching_control *caching_ path->reada = READA_FORWARD; search_offset = index * div_u64(block_group->length, max_index); - key->objectid = block_group->start + search_offset; - key->type = BTRFS_EXTENT_ITEM_KEY; - key->offset = 0; + search_key.objectid = block_group->start + search_offset; + search_key.type = BTRFS_EXTENT_ITEM_KEY; + search_key.offset = 0; - while (1) { - ret = btrfs_search_forward(extent_root, key, path, 0); - if (ret != 0) - goto out; + btrfs_for_each_slot(extent_root, &search_key, found_key, path, ret) { /* Success; sampled an extent item in the block group */ - if (key->type == BTRFS_EXTENT_ITEM_KEY && - key->objectid >= block_group->start && - key->objectid + key->offset <= search_end) - goto out; + if (found_key->type == BTRFS_EXTENT_ITEM_KEY && + found_key->objectid >= block_group->start && + found_key->objectid + found_key->offset <= search_end) + break; /* We can't possibly find a valid extent item anymore */ - if (key->objectid >= search_end) { + if (found_key->objectid >= search_end) { ret = 1; break; } - if (key->type < BTRFS_EXTENT_ITEM_KEY) - key->type = BTRFS_EXTENT_ITEM_KEY; - else - key->objectid++; - btrfs_release_path(path); - up_read(&fs_info->commit_root_sem); - mutex_unlock(&caching_ctl->mutex); - cond_resched(); - mutex_lock(&caching_ctl->mutex); - down_read(&fs_info->commit_root_sem); } -out: + lockdep_assert_held(&caching_ctl->mutex); lockdep_assert_held_read(&fs_info->commit_root_sem); btrfs_free_path(path); @@ -659,6 +647,7 @@ out: static int load_block_group_size_class(struct btrfs_caching_control *caching_ctl, struct btrfs_block_group *block_group) { + struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_key key; int i; u64 min_size = block_group->length; @@ -668,6 +657,8 @@ static int load_block_group_size_class(struct btrfs_caching_control *caching_ctl if (!btrfs_block_group_should_use_size_class(block_group)) return 0; + lockdep_assert_held(&caching_ctl->mutex); + lockdep_assert_held_read(&fs_info->commit_root_sem); for (i = 0; i < 5; ++i) { ret = sample_block_group_extent_item(caching_ctl, block_group, i, 5, &key); if (ret < 0) @@ -682,7 +673,6 @@ static int load_block_group_size_class(struct btrfs_caching_control *caching_ctl block_group->size_class = size_class; spin_unlock(&block_group->lock); } - out: return ret; } @@ -1836,7 +1826,8 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) btrfs_info(fs_info, "reclaiming chunk %llu with %llu%% used %llu%% unusable", - bg->start, div_u64(bg->used * 100, bg->length), + bg->start, + div64_u64(bg->used * 100, bg->length), div64_u64(zone_unusable * 100, bg->length)); trace_btrfs_reclaim_block_group(bg); ret = btrfs_relocate_chunk(fs_info, bg->start); @@ -2493,18 +2484,29 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans, struct btrfs_block_group_item bgi; struct btrfs_root *root = btrfs_block_group_root(fs_info); struct btrfs_key key; + u64 old_commit_used; + int ret; spin_lock(&block_group->lock); btrfs_set_stack_block_group_used(&bgi, block_group->used); btrfs_set_stack_block_group_chunk_objectid(&bgi, block_group->global_root_id); btrfs_set_stack_block_group_flags(&bgi, block_group->flags); + old_commit_used = block_group->commit_used; + block_group->commit_used = block_group->used; key.objectid = block_group->start; key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; key.offset = block_group->length; spin_unlock(&block_group->lock); - return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi)); + ret = btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi)); + if (ret < 0) { + spin_lock(&block_group->lock); + block_group->commit_used = old_commit_used; + spin_unlock(&block_group->lock); + } + + return ret; } static int insert_dev_extent(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 0095c6e4c3d1..6b457b010cbc 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1048,7 +1048,7 @@ again: * so there is only one iref. The case that several irefs are * in the same item doesn't exist. */ - btrfs_del_item(trans, root, path); + ret = btrfs_del_item(trans, root, path); out: btrfs_release_delayed_iref(node); btrfs_release_path(path); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index be94030e1dfb..138afa955370 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -763,7 +763,13 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, goto next; } + flags = em->flags; clear_bit(EXTENT_FLAG_PINNED, &em->flags); + /* + * In case we split the extent map, we want to preserve the + * EXTENT_FLAG_LOGGING flag on our extent map, but we don't want + * it on the new extent maps. + */ clear_bit(EXTENT_FLAG_LOGGING, &flags); modified = !list_empty(&em->list); @@ -774,7 +780,6 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, if (em->start >= start && em_end <= end) goto remove_em; - flags = em->flags; gen = em->generation; compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 84626c8ad5bf..a0ef1a1784c7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2859,6 +2859,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, di_args->bytes_used = btrfs_device_get_bytes_used(dev); di_args->total_bytes = btrfs_device_get_total_bytes(dev); memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); + memcpy(di_args->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE); if (dev->name) strscpy(di_args->path, btrfs_dev_name(dev), sizeof(di_args->path)); else diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 8c5efa5813b3..37fc58a7f27e 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -9,6 +9,7 @@ #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/bug.h> +#include <linux/list.h> #include <crypto/hash.h> #include "messages.h" #include "ctree.h" @@ -778,6 +779,45 @@ static ssize_t btrfs_chunk_size_store(struct kobject *kobj, return len; } +static ssize_t btrfs_size_classes_show(struct kobject *kobj, + struct kobj_attribute *a, char *buf) +{ + struct btrfs_space_info *sinfo = to_space_info(kobj); + struct btrfs_block_group *bg; + u32 none = 0; + u32 small = 0; + u32 medium = 0; + u32 large = 0; + + for (int i = 0; i < BTRFS_NR_RAID_TYPES; ++i) { + down_read(&sinfo->groups_sem); + list_for_each_entry(bg, &sinfo->block_groups[i], list) { + if (!btrfs_block_group_should_use_size_class(bg)) + continue; + switch (bg->size_class) { + case BTRFS_BG_SZ_NONE: + none++; + break; + case BTRFS_BG_SZ_SMALL: + small++; + break; + case BTRFS_BG_SZ_MEDIUM: + medium++; + break; + case BTRFS_BG_SZ_LARGE: + large++; + break; + } + } + up_read(&sinfo->groups_sem); + } + return sysfs_emit(buf, "none %u\n" + "small %u\n" + "medium %u\n" + "large %u\n", + none, small, medium, large); +} + #ifdef CONFIG_BTRFS_DEBUG /* * Request chunk allocation with current chunk size. @@ -835,6 +875,7 @@ SPACE_INFO_ATTR(bytes_zone_unusable); SPACE_INFO_ATTR(disk_used); SPACE_INFO_ATTR(disk_total); BTRFS_ATTR_RW(space_info, chunk_size, btrfs_chunk_size_show, btrfs_chunk_size_store); +BTRFS_ATTR(space_info, size_classes, btrfs_size_classes_show); static ssize_t btrfs_sinfo_bg_reclaim_threshold_show(struct kobject *kobj, struct kobj_attribute *a, @@ -887,6 +928,7 @@ static struct attribute *space_info_attrs[] = { BTRFS_ATTR_PTR(space_info, disk_total), BTRFS_ATTR_PTR(space_info, bg_reclaim_threshold), BTRFS_ATTR_PTR(space_info, chunk_size), + BTRFS_ATTR_PTR(space_info, size_classes), #ifdef CONFIG_BTRFS_DEBUG BTRFS_ATTR_PTR(space_info, force_chunk_alloc), #endif diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 1911f7016fa1..19a70a69c760 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -420,6 +420,11 @@ skip_rdma: from_kuid(&init_user_ns, ses->linux_uid), from_kuid(&init_user_ns, ses->cred_uid)); + if (ses->dfs_root_ses) { + seq_printf(m, "\n\tDFS root session id: 0x%llx", + ses->dfs_root_ses->Suid); + } + spin_lock(&ses->chan_lock); if (CIFS_CHAN_NEEDS_RECONNECT(ses, 0)) seq_puts(m, "\tPrimary channel: DISCONNECTED "); diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 2b1a8d55b4ec..cb40074feb3e 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -179,6 +179,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct path *path) tmp.source = full_path; tmp.leaf_fullpath = NULL; tmp.UNC = tmp.prepath = NULL; + tmp.dfs_root_ses = NULL; rc = smb3_fs_context_dup(ctx, &tmp); if (rc) { diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 013a4bd65280..651759192280 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -61,8 +61,6 @@ struct cifs_sb_info { /* only used when CIFS_MOUNT_USE_PREFIX_PATH is set */ char *prepath; - /* randomly generated 128-bit number for indexing dfs mount groups in referral cache */ - uuid_t dfs_mount_id; /* * Indicate whether serverino option was turned off later * (cifs_autodisable_serverino) in order to match new mounts. diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index a99883f16d94..08a73dcb7786 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1233,6 +1233,7 @@ struct cifs_tcon { /* BB add field for back pointer to sb struct(s)? */ #ifdef CONFIG_CIFS_DFS_UPCALL struct list_head ulist; /* cache update list */ + struct list_head dfs_ses_list; #endif struct delayed_work query_interfaces; /* query interfaces workqueue job */ }; @@ -1749,9 +1750,8 @@ struct cifs_mount_ctx { struct TCP_Server_Info *server; struct cifs_ses *ses; struct cifs_tcon *tcon; - struct cifs_ses *root_ses; - uuid_t mount_id; char *origin_fullpath, *leaf_fullpath; + struct list_head dfs_ses_list; }; static inline void free_dfs_info_param(struct dfs_info3_param *param) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5233f14f0636..0eceddde7140 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2229,6 +2229,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) * need to lock before changing something in the session. */ spin_lock(&cifs_tcp_ses_lock); + ses->dfs_root_ses = ctx->dfs_root_ses; list_add(&ses->smb_ses_list, &server->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); @@ -3407,7 +3408,8 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) bool isdfs; int rc; - uuid_gen(&mnt_ctx.mount_id); + INIT_LIST_HEAD(&mnt_ctx.dfs_ses_list); + rc = dfs_mount_share(&mnt_ctx, &isdfs); if (rc) goto error; @@ -3427,7 +3429,6 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) kfree(cifs_sb->prepath); cifs_sb->prepath = ctx->prepath; ctx->prepath = NULL; - uuid_copy(&cifs_sb->dfs_mount_id, &mnt_ctx.mount_id); out: cifs_try_adding_channels(cifs_sb, mnt_ctx.ses); @@ -3439,7 +3440,7 @@ out: return rc; error: - dfs_cache_put_refsrv_sessions(&mnt_ctx.mount_id); + dfs_put_root_smb_sessions(&mnt_ctx.dfs_ses_list); kfree(mnt_ctx.origin_fullpath); kfree(mnt_ctx.leaf_fullpath); cifs_mount_put_conns(&mnt_ctx); @@ -3637,9 +3638,6 @@ cifs_umount(struct cifs_sb_info *cifs_sb) spin_unlock(&cifs_sb->tlink_tree_lock); kfree(cifs_sb->prepath); -#ifdef CONFIG_CIFS_DFS_UPCALL - dfs_cache_put_refsrv_sessions(&cifs_sb->dfs_mount_id); -#endif call_rcu(&cifs_sb->rcu, delayed_free); } diff --git a/fs/cifs/dfs.c b/fs/cifs/dfs.c index b64d20374b9c..c8bda52fa096 100644 --- a/fs/cifs/dfs.c +++ b/fs/cifs/dfs.c @@ -95,25 +95,31 @@ static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path) ctx->leaf_fullpath = (char *)full_path; rc = cifs_mount_get_session(mnt_ctx); ctx->leaf_fullpath = NULL; - if (!rc) { - struct cifs_ses *ses = mnt_ctx->ses; - mutex_lock(&ses->session_mutex); - ses->dfs_root_ses = mnt_ctx->root_ses; - mutex_unlock(&ses->session_mutex); - } return rc; } -static void set_root_ses(struct cifs_mount_ctx *mnt_ctx) +static int get_root_smb_session(struct cifs_mount_ctx *mnt_ctx) { - if (mnt_ctx->ses) { + struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + struct dfs_root_ses *root_ses; + struct cifs_ses *ses = mnt_ctx->ses; + + if (ses) { + root_ses = kmalloc(sizeof(*root_ses), GFP_KERNEL); + if (!root_ses) + return -ENOMEM; + + INIT_LIST_HEAD(&root_ses->list); + spin_lock(&cifs_tcp_ses_lock); - mnt_ctx->ses->ses_count++; + ses->ses_count++; spin_unlock(&cifs_tcp_ses_lock); - dfs_cache_add_refsrv_session(&mnt_ctx->mount_id, mnt_ctx->ses); + root_ses->ses = ses; + list_add_tail(&root_ses->list, &mnt_ctx->dfs_ses_list); } - mnt_ctx->root_ses = mnt_ctx->ses; + ctx->dfs_root_ses = ses; + return 0; } static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, const char *full_path, @@ -121,7 +127,8 @@ static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, co { struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; struct dfs_info3_param ref = {}; - int rc; + bool is_refsrv = false; + int rc, rc2; rc = dfs_cache_get_tgt_referral(ref_path + 1, tit, &ref); if (rc) @@ -136,8 +143,7 @@ static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, co if (rc) goto out; - if (ref.flags & DFSREF_REFERRAL_SERVER) - set_root_ses(mnt_ctx); + is_refsrv = !!(ref.flags & DFSREF_REFERRAL_SERVER); rc = -EREMOTE; if (ref.flags & DFSREF_STORAGE_SERVER) { @@ -146,13 +152,17 @@ static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, co goto out; /* some servers may not advertise referral capability under ref.flags */ - if (!(ref.flags & DFSREF_REFERRAL_SERVER) && - is_tcon_dfs(mnt_ctx->tcon)) - set_root_ses(mnt_ctx); + is_refsrv |= is_tcon_dfs(mnt_ctx->tcon); rc = cifs_is_path_remote(mnt_ctx); } + if (rc == -EREMOTE && is_refsrv) { + rc2 = get_root_smb_session(mnt_ctx); + if (rc2) + rc = rc2; + } + out: free_dfs_info_param(&ref); return rc; @@ -165,6 +175,7 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) char *ref_path = NULL, *full_path = NULL; struct dfs_cache_tgt_iterator *tit; struct TCP_Server_Info *server; + struct cifs_tcon *tcon; char *origin_fullpath = NULL; int num_links = 0; int rc; @@ -234,12 +245,22 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) if (!rc) { server = mnt_ctx->server; + tcon = mnt_ctx->tcon; mutex_lock(&server->refpath_lock); - server->origin_fullpath = origin_fullpath; - server->current_fullpath = server->leaf_fullpath; + if (!server->origin_fullpath) { + server->origin_fullpath = origin_fullpath; + server->current_fullpath = server->leaf_fullpath; + origin_fullpath = NULL; + } mutex_unlock(&server->refpath_lock); - origin_fullpath = NULL; + + if (list_empty(&tcon->dfs_ses_list)) { + list_replace_init(&mnt_ctx->dfs_ses_list, + &tcon->dfs_ses_list); + } else { + dfs_put_root_smb_sessions(&mnt_ctx->dfs_ses_list); + } } out: @@ -260,7 +281,7 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs) rc = get_session(mnt_ctx, NULL); if (rc) return rc; - mnt_ctx->root_ses = mnt_ctx->ses; + ctx->dfs_root_ses = mnt_ctx->ses; /* * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally * try to get an DFS referral (even cached) to determine whether it is an DFS mount. @@ -280,7 +301,9 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs) } *isdfs = true; - set_root_ses(mnt_ctx); + rc = get_root_smb_session(mnt_ctx); + if (rc) + return rc; return __dfs_mount_share(mnt_ctx); } diff --git a/fs/cifs/dfs.h b/fs/cifs/dfs.h index 344bea6d8bab..13f26e01f7b9 100644 --- a/fs/cifs/dfs.h +++ b/fs/cifs/dfs.h @@ -10,6 +10,11 @@ #include "fs_context.h" #include "cifs_unicode.h" +struct dfs_root_ses { + struct list_head list; + struct cifs_ses *ses; +}; + int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref, struct smb3_fs_context *ctx); int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs); @@ -22,9 +27,10 @@ static inline char *dfs_get_path(struct cifs_sb_info *cifs_sb, const char *path) static inline int dfs_get_referral(struct cifs_mount_ctx *mnt_ctx, const char *path, struct dfs_info3_param *ref, struct dfs_cache_tgt_list *tl) { + struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; - return dfs_cache_find(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls, + return dfs_cache_find(mnt_ctx->xid, ctx->dfs_root_ses, cifs_sb->local_nls, cifs_remap(cifs_sb), path, ref, tl); } @@ -43,4 +49,15 @@ static inline char *dfs_get_automount_devname(struct dentry *dentry, void *page) true); } +static inline void dfs_put_root_smb_sessions(struct list_head *head) +{ + struct dfs_root_ses *root, *tmp; + + list_for_each_entry_safe(root, tmp, head, list) { + list_del_init(&root->list); + cifs_put_smb_ses(root->ses); + kfree(root); + } +} + #endif /* _CIFS_DFS_H */ diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index ac86bd0ebd63..1c59811bfa73 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -49,17 +49,6 @@ struct cache_entry { struct cache_dfs_tgt *tgthint; }; -/* List of referral server sessions per dfs mount */ -struct mount_group { - struct list_head list; - uuid_t id; - struct cifs_ses *sessions[CACHE_MAX_ENTRIES]; - int num_sessions; - spinlock_t lock; - struct list_head refresh_list; - struct kref refcount; -}; - static struct kmem_cache *cache_slab __read_mostly; static struct workqueue_struct *dfscache_wq __read_mostly; @@ -76,85 +65,10 @@ static atomic_t cache_count; static struct hlist_head cache_htable[CACHE_HTABLE_SIZE]; static DECLARE_RWSEM(htable_rw_lock); -static LIST_HEAD(mount_group_list); -static DEFINE_MUTEX(mount_group_list_lock); - static void refresh_cache_worker(struct work_struct *work); static DECLARE_DELAYED_WORK(refresh_task, refresh_cache_worker); -static void __mount_group_release(struct mount_group *mg) -{ - int i; - - for (i = 0; i < mg->num_sessions; i++) - cifs_put_smb_ses(mg->sessions[i]); - kfree(mg); -} - -static void mount_group_release(struct kref *kref) -{ - struct mount_group *mg = container_of(kref, struct mount_group, refcount); - - mutex_lock(&mount_group_list_lock); - list_del(&mg->list); - mutex_unlock(&mount_group_list_lock); - __mount_group_release(mg); -} - -static struct mount_group *find_mount_group_locked(const uuid_t *id) -{ - struct mount_group *mg; - - list_for_each_entry(mg, &mount_group_list, list) { - if (uuid_equal(&mg->id, id)) - return mg; - } - return ERR_PTR(-ENOENT); -} - -static struct mount_group *__get_mount_group_locked(const uuid_t *id) -{ - struct mount_group *mg; - - mg = find_mount_group_locked(id); - if (!IS_ERR(mg)) - return mg; - - mg = kmalloc(sizeof(*mg), GFP_KERNEL); - if (!mg) - return ERR_PTR(-ENOMEM); - kref_init(&mg->refcount); - uuid_copy(&mg->id, id); - mg->num_sessions = 0; - spin_lock_init(&mg->lock); - list_add(&mg->list, &mount_group_list); - return mg; -} - -static struct mount_group *get_mount_group(const uuid_t *id) -{ - struct mount_group *mg; - - mutex_lock(&mount_group_list_lock); - mg = __get_mount_group_locked(id); - if (!IS_ERR(mg)) - kref_get(&mg->refcount); - mutex_unlock(&mount_group_list_lock); - - return mg; -} - -static void free_mount_group_list(void) -{ - struct mount_group *mg, *tmp_mg; - - list_for_each_entry_safe(mg, tmp_mg, &mount_group_list, list) { - list_del_init(&mg->list); - __mount_group_release(mg); - } -} - /** * dfs_cache_canonical_path - get a canonical DFS path * @@ -704,7 +618,6 @@ void dfs_cache_destroy(void) { cancel_delayed_work_sync(&refresh_task); unload_nls(cache_cp); - free_mount_group_list(); flush_cache_ents(); kmem_cache_destroy(cache_slab); destroy_workqueue(dfscache_wq); @@ -1111,54 +1024,6 @@ out_unlock: return rc; } -/** - * dfs_cache_add_refsrv_session - add SMB session of referral server - * - * @mount_id: mount group uuid to lookup. - * @ses: reference counted SMB session of referral server. - */ -void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses) -{ - struct mount_group *mg; - - if (WARN_ON_ONCE(!mount_id || uuid_is_null(mount_id) || !ses)) - return; - - mg = get_mount_group(mount_id); - if (WARN_ON_ONCE(IS_ERR(mg))) - return; - - spin_lock(&mg->lock); - if (mg->num_sessions < ARRAY_SIZE(mg->sessions)) - mg->sessions[mg->num_sessions++] = ses; - spin_unlock(&mg->lock); - kref_put(&mg->refcount, mount_group_release); -} - -/** - * dfs_cache_put_refsrv_sessions - put all referral server sessions - * - * Put all SMB sessions from the given mount group id. - * - * @mount_id: mount group uuid to lookup. - */ -void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id) -{ - struct mount_group *mg; - - if (!mount_id || uuid_is_null(mount_id)) - return; - - mutex_lock(&mount_group_list_lock); - mg = find_mount_group_locked(mount_id); - if (IS_ERR(mg)) { - mutex_unlock(&mount_group_list_lock); - return; - } - mutex_unlock(&mount_group_list_lock); - kref_put(&mg->refcount, mount_group_release); -} - /* Extract share from DFS target and return a pointer to prefix path or NULL */ static const char *parse_target_share(const char *target, char **share) { @@ -1384,11 +1249,6 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) cifs_dbg(FYI, "%s: not a dfs mount\n", __func__); return 0; } - - if (uuid_is_null(&cifs_sb->dfs_mount_id)) { - cifs_dbg(FYI, "%s: no dfs mount group id\n", __func__); - return -EINVAL; - } /* * After reconnecting to a different server, unique ids won't match anymore, so we disable * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE). diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h index be3b5a44cf82..e0d39393035a 100644 --- a/fs/cifs/dfs_cache.h +++ b/fs/cifs/dfs_cache.h @@ -40,8 +40,6 @@ int dfs_cache_get_tgt_referral(const char *path, const struct dfs_cache_tgt_iter struct dfs_info3_param *ref); int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, char **share, char **prefix); -void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id); -void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses); char *dfs_cache_canonical_path(const char *path, const struct nls_table *cp, int remap); int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb); diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h index 44cb5639ed3b..1b8d4e27f831 100644 --- a/fs/cifs/fs_context.h +++ b/fs/cifs/fs_context.h @@ -265,6 +265,7 @@ struct smb3_fs_context { bool rootfs:1; /* if it's a SMB root file system */ bool witness:1; /* use witness protocol */ char *leaf_fullpath; + struct cifs_ses *dfs_root_ses; }; extern const struct fs_parameter_spec smb3_fs_parameters[]; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index a0d286ee723d..b44fb51968bf 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -22,6 +22,7 @@ #ifdef CONFIG_CIFS_DFS_UPCALL #include "dns_resolve.h" #include "dfs_cache.h" +#include "dfs.h" #endif #include "fs_context.h" #include "cached_dir.h" @@ -134,6 +135,9 @@ tconInfoAlloc(void) spin_lock_init(&ret_buf->stat_lock); atomic_set(&ret_buf->num_local_opens, 0); atomic_set(&ret_buf->num_remote_opens, 0); +#ifdef CONFIG_CIFS_DFS_UPCALL + INIT_LIST_HEAD(&ret_buf->dfs_ses_list); +#endif return ret_buf; } @@ -149,6 +153,9 @@ tconInfoFree(struct cifs_tcon *tcon) atomic_dec(&tconInfoAllocCount); kfree(tcon->nativeFileSystem); kfree_sensitive(tcon->password); +#ifdef CONFIG_CIFS_DFS_UPCALL + dfs_put_root_smb_sessions(&tcon->dfs_ses_list); +#endif kfree(tcon); } @@ -1255,6 +1262,7 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid, * removing cached DFS targets that the client would eventually * need during failover. */ + ses = CIFS_DFS_ROOT_SES(ses); if (ses->server->ops->get_dfs_refer && !ses->server->ops->get_dfs_refer(xid, ses, ref_path, &refs, &num_refs, cifs_sb->local_nls, diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 9b956294e864..8dd3791b5c53 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -234,15 +234,32 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, size[0] = 8; /* sizeof __le64 */ data[0] = ptr; - rc = SMB2_set_info_init(tcon, server, - &rqst[num_rqst], COMPOUND_FID, - COMPOUND_FID, current->tgid, - FILE_END_OF_FILE_INFORMATION, - SMB2_O_INFO_FILE, 0, data, size); + if (cfile) { + rc = SMB2_set_info_init(tcon, server, + &rqst[num_rqst], + cfile->fid.persistent_fid, + cfile->fid.volatile_fid, + current->tgid, + FILE_END_OF_FILE_INFORMATION, + SMB2_O_INFO_FILE, 0, + data, size); + } else { + rc = SMB2_set_info_init(tcon, server, + &rqst[num_rqst], + COMPOUND_FID, + COMPOUND_FID, + current->tgid, + FILE_END_OF_FILE_INFORMATION, + SMB2_O_INFO_FILE, 0, + data, size); + if (!rc) { + smb2_set_next_command(tcon, &rqst[num_rqst]); + smb2_set_related(&rqst[num_rqst]); + } + } if (rc) goto finished; - smb2_set_next_command(tcon, &rqst[num_rqst]); - smb2_set_related(&rqst[num_rqst++]); + num_rqst++; trace_smb3_set_eof_enter(xid, ses->Suid, tcon->tid, full_path); break; case SMB2_OP_SET_INFO: diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 381babc1212c..d827b7547ffa 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -425,7 +425,7 @@ generate_smb3signingkey(struct cifs_ses *ses, /* safe to access primary channel, since it will never go away */ spin_lock(&ses->chan_lock); - memcpy(ses->chans[0].signkey, ses->smb3signingkey, + memcpy(ses->chans[chan_index].signkey, ses->smb3signingkey, SMB3_SIGN_KEY_SIZE); spin_unlock(&ses->chan_lock); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index b42050c68e6c..24bdd5f4d3bc 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -278,7 +278,7 @@ static int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, struct smb_rqst *rqst) { - int rc = 0; + int rc; struct kvec *iov; int n_vec; unsigned int send_length = 0; @@ -289,6 +289,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, struct msghdr smb_msg = {}; __be32 rfc1002_marker; + cifs_in_send_inc(server); if (cifs_rdma_enabled(server)) { /* return -EAGAIN when connecting or reconnecting */ rc = -EAGAIN; @@ -297,14 +298,17 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, goto smbd_done; } + rc = -EAGAIN; if (ssocket == NULL) - return -EAGAIN; + goto out; + rc = -ERESTARTSYS; if (fatal_signal_pending(current)) { cifs_dbg(FYI, "signal pending before send request\n"); - return -ERESTARTSYS; + goto out; } + rc = 0; /* cork the socket */ tcp_sock_set_cork(ssocket->sk, true); @@ -407,7 +411,8 @@ smbd_done: rc); else if (rc > 0) rc = 0; - +out: + cifs_in_send_dec(server); return rc; } @@ -826,9 +831,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, * I/O response may come back and free the mid entry on another thread. */ cifs_save_when_sent(mid); - cifs_in_send_inc(server); rc = smb_send_rqst(server, 1, rqst, flags); - cifs_in_send_dec(server); if (rc < 0) { revert_current_mid(server, mid->credits); @@ -1144,9 +1147,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, else midQ[i]->callback = cifs_compound_last_callback; } - cifs_in_send_inc(server); rc = smb_send_rqst(server, num_rqst, rqst, flags); - cifs_in_send_dec(server); for (i = 0; i < num_rqst; i++) cifs_save_when_sent(midQ[i]); @@ -1396,9 +1397,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, midQ->mid_state = MID_REQUEST_SUBMITTED; - cifs_in_send_inc(server); rc = smb_send(server, in_buf, len); - cifs_in_send_dec(server); cifs_save_when_sent(midQ); if (rc < 0) @@ -1539,9 +1538,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, } midQ->mid_state = MID_REQUEST_SUBMITTED; - cifs_in_send_inc(server); rc = smb_send(server, in_buf, len); - cifs_in_send_dec(server); cifs_save_when_sent(midQ); if (rc < 0) diff --git a/fs/erofs/data.c b/fs/erofs/data.c index e16545849ea7..c08c0f578bc6 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -376,7 +376,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (bdev) blksize_mask = bdev_logical_block_size(bdev) - 1; else - blksize_mask = (1 << inode->i_blkbits) - 1; + blksize_mask = i_blocksize(inode) - 1; if ((iocb->ki_pos | iov_iter_count(to) | iov_iter_alignment(to)) & blksize_mask) diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 091fd5adf818..d38e19c11270 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -47,7 +47,7 @@ void z_erofs_lzma_exit(void) } } -int z_erofs_lzma_init(void) +int __init z_erofs_lzma_init(void) { unsigned int i; @@ -278,7 +278,7 @@ again: } } if (no < nrpages_out && strm->buf.out) - kunmap(rq->in[no]); + kunmap(rq->out[no]); if (ni < nrpages_in) kunmap(rq->in[ni]); /* 4. push back LZMA stream context to the global list */ diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 3f3561d37d1b..1db018f8c2e8 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -486,7 +486,7 @@ static inline void *erofs_vm_map_ram(struct page **pages, unsigned int count) void *erofs_get_pcpubuf(unsigned int requiredpages); void erofs_put_pcpubuf(void *ptr); int erofs_pcpubuf_growsize(unsigned int nrpages); -void erofs_pcpubuf_init(void); +void __init erofs_pcpubuf_init(void); void erofs_pcpubuf_exit(void); int erofs_register_sysfs(struct super_block *sb); @@ -545,7 +545,7 @@ static inline int z_erofs_fill_inode(struct inode *inode) { return -EOPNOTSUPP; #endif /* !CONFIG_EROFS_FS_ZIP */ #ifdef CONFIG_EROFS_FS_ZIP_LZMA -int z_erofs_lzma_init(void); +int __init z_erofs_lzma_init(void); void z_erofs_lzma_exit(void); int z_erofs_load_lzma_config(struct super_block *sb, struct erofs_super_block *dsb, diff --git a/fs/erofs/pcpubuf.c b/fs/erofs/pcpubuf.c index a2efd833d1b6..c7a4b1d77069 100644 --- a/fs/erofs/pcpubuf.c +++ b/fs/erofs/pcpubuf.c @@ -114,7 +114,7 @@ out: return ret; } -void erofs_pcpubuf_init(void) +void __init erofs_pcpubuf_init(void) { int cpu; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 3247d2422bea..f1708c77a991 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1312,12 +1312,12 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, if (!be->decompressed_pages) be->decompressed_pages = - kcalloc(be->nr_pages, sizeof(struct page *), - GFP_KERNEL | __GFP_NOFAIL); + kvcalloc(be->nr_pages, sizeof(struct page *), + GFP_KERNEL | __GFP_NOFAIL); if (!be->compressed_pages) be->compressed_pages = - kcalloc(pclusterpages, sizeof(struct page *), - GFP_KERNEL | __GFP_NOFAIL); + kvcalloc(pclusterpages, sizeof(struct page *), + GFP_KERNEL | __GFP_NOFAIL); z_erofs_parse_out_bvecs(be); err2 = z_erofs_parse_in_bvecs(be, &overlapped); @@ -1365,7 +1365,7 @@ out: } if (be->compressed_pages < be->onstack_pages || be->compressed_pages >= be->onstack_pages + Z_EROFS_ONSTACK_PAGES) - kfree(be->compressed_pages); + kvfree(be->compressed_pages); z_erofs_fill_other_copies(be, err); for (i = 0; i < be->nr_pages; ++i) { @@ -1384,7 +1384,7 @@ out: } if (be->decompressed_pages != be->onstack_pages) - kfree(be->decompressed_pages); + kvfree(be->decompressed_pages); pcl->length = 0; pcl->partial = true; diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 8bf6d30518b6..655da4d739cb 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -757,9 +757,6 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, err = z_erofs_do_map_blocks(inode, map, flags); out: trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err); - - /* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */ - DBG_BUGON(err < 0 && err != -ENOMEM); return err; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4eeb02d456a9..08b29c289da4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1387,7 +1387,7 @@ struct ext4_super_block { __le32 s_first_meta_bg; /* First metablock block group */ __le32 s_mkfs_time; /* When the filesystem was created */ __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ - /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ + /* 64bit support valid if EXT4_FEATURE_INCOMPAT_64BIT */ /*150*/ __le32 s_blocks_count_hi; /* Blocks count */ __le32 s_r_blocks_count_hi; /* Reserved blocks count */ __le32 s_free_blocks_count_hi; /* Free blocks count */ diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index 4493ef0c715e..cdf9bfe10137 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -486,6 +486,8 @@ static int ext4_getfsmap_datadev(struct super_block *sb, keys[0].fmr_physical = bofs; if (keys[1].fmr_physical >= eofs) keys[1].fmr_physical = eofs - 1; + if (keys[1].fmr_physical < keys[0].fmr_physical) + return 0; start_fsb = keys[0].fmr_physical; end_fsb = keys[1].fmr_physical; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 2b42ececa46d..1602d74b5eeb 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -159,7 +159,6 @@ int ext4_find_inline_data_nolock(struct inode *inode) (void *)ext4_raw_inode(&is.iloc)); EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + le32_to_cpu(is.s.here->e_value_size); - ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); } out: brelse(is.iloc.bh); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d251d705c276..bf0b7dea4900 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4797,8 +4797,13 @@ static inline int ext4_iget_extra_inode(struct inode *inode, if (EXT4_INODE_HAS_XATTR_SPACE(inode) && *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { + int err; + ext4_set_inode_state(inode, EXT4_STATE_XATTR); - return ext4_find_inline_data_nolock(inode); + err = ext4_find_inline_data_nolock(inode); + if (!err && ext4_has_inline_data(inode)) + ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); + return err; } else EXT4_I(inode)->i_inline_off = 0; return 0; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 12435d61f09e..f9a430152063 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -431,6 +431,7 @@ static long swap_inode_boot_loader(struct super_block *sb, ei_bl->i_flags = 0; inode_set_iversion(inode_bl, 1); i_size_write(inode_bl, 0); + EXT4_I(inode_bl)->i_disksize = inode_bl->i_size; inode_bl->i_mode = S_IFREG; if (ext4_has_feature_extents(sb)) { ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 94608b7df7e8..31e21de56432 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1595,11 +1595,10 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir, int has_inline_data = 1; ret = ext4_find_inline_entry(dir, fname, res_dir, &has_inline_data); - if (has_inline_data) { - if (inlined) - *inlined = 1; + if (inlined) + *inlined = has_inline_data; + if (has_inline_data) goto cleanup_and_exit; - } } if ((namelen <= 2) && (name[0] == '.') && @@ -3646,7 +3645,8 @@ static void ext4_resetent(handle_t *handle, struct ext4_renament *ent, * so the old->de may no longer valid and need to find it again * before reset old inode info. */ - old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); + old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, + &old.inlined); if (IS_ERR(old.bh)) retval = PTR_ERR(old.bh); if (!old.bh) @@ -3813,9 +3813,20 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, return retval; } - old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); - if (IS_ERR(old.bh)) - return PTR_ERR(old.bh); + /* + * We need to protect against old.inode directory getting converted + * from inline directory format into a normal one. + */ + if (S_ISDIR(old.inode->i_mode)) + inode_lock_nested(old.inode, I_MUTEX_NONDIR2); + + old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, + &old.inlined); + if (IS_ERR(old.bh)) { + retval = PTR_ERR(old.bh); + goto unlock_moved_dir; + } + /* * Check for inode number is _not_ due to possible IO errors. * We might rmdir the source, keep it as pwd of some process @@ -3872,11 +3883,6 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir)) goto end_rename; } - /* - * We need to protect against old.inode directory getting - * converted from inline directory format into a normal one. - */ - inode_lock_nested(old.inode, I_MUTEX_NONDIR2); retval = ext4_rename_dir_prepare(handle, &old); if (retval) { inode_unlock(old.inode); @@ -4013,12 +4019,15 @@ end_rename: } else { ext4_journal_stop(handle); } - if (old.dir_bh) - inode_unlock(old.inode); release_bh: brelse(old.dir_bh); brelse(old.bh); brelse(new.bh); + +unlock_moved_dir: + if (S_ISDIR(old.inode->i_mode)) + inode_unlock(old.inode); + return retval; } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index beaec6d81074..1e4db96a04e6 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -409,7 +409,8 @@ static void io_submit_init_bio(struct ext4_io_submit *io, static void io_submit_add_bh(struct ext4_io_submit *io, struct inode *inode, - struct page *page, + struct page *pagecache_page, + struct page *bounce_page, struct buffer_head *bh) { int ret; @@ -421,10 +422,11 @@ submit_and_retry: } if (io->io_bio == NULL) io_submit_init_bio(io, bh); - ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh)); + ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page, + bh->b_size, bh_offset(bh)); if (ret != bh->b_size) goto submit_and_retry; - wbc_account_cgroup_owner(io->io_wbc, page, bh->b_size); + wbc_account_cgroup_owner(io->io_wbc, pagecache_page, bh->b_size); io->io_next_block++; } @@ -561,8 +563,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, do { if (!buffer_async_write(bh)) continue; - io_submit_add_bh(io, inode, - bounce_page ? bounce_page : page, bh); + io_submit_add_bh(io, inode, page, bounce_page, bh); } while ((bh = bh->b_this_page) != head); unlock: unlock_page(page); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 88f7b8a88c76..f43e526112ae 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5726,6 +5726,28 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb, return journal_inode; } +static int ext4_journal_bmap(journal_t *journal, sector_t *block) +{ + struct ext4_map_blocks map; + int ret; + + if (journal->j_inode == NULL) + return 0; + + map.m_lblk = *block; + map.m_len = 1; + ret = ext4_map_blocks(NULL, journal->j_inode, &map, 0); + if (ret <= 0) { + ext4_msg(journal->j_inode->i_sb, KERN_CRIT, + "journal bmap failed: block %llu ret %d\n", + *block, ret); + jbd2_journal_abort(journal, ret ? ret : -EIO); + return ret; + } + *block = map.m_pblk; + return 0; +} + static journal_t *ext4_get_journal(struct super_block *sb, unsigned int journal_inum) { @@ -5746,6 +5768,7 @@ static journal_t *ext4_get_journal(struct super_block *sb, return NULL; } journal->j_private = sb; + journal->j_bmap = ext4_journal_bmap; ext4_init_journal_params(sb, journal); return journal; } @@ -5920,6 +5943,7 @@ static int ext4_load_journal(struct super_block *sb, err = jbd2_journal_wipe(journal, !really_read_only); if (!err) { char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL); + if (save) memcpy(save, ((char *) es) + EXT4_S_ERR_START, EXT4_S_ERR_LEN); @@ -5928,6 +5952,14 @@ static int ext4_load_journal(struct super_block *sb, memcpy(((char *) es) + EXT4_S_ERR_START, save, EXT4_S_ERR_LEN); kfree(save); + es->s_state |= cpu_to_le16(EXT4_SB(sb)->s_mount_state & + EXT4_ERROR_FS); + /* Write out restored error information to the superblock */ + if (!bdev_read_only(sb->s_bdev)) { + int err2; + err2 = ext4_commit_super(sb); + err = err ? : err2; + } } if (err) { @@ -6157,11 +6189,13 @@ static int ext4_clear_journal_err(struct super_block *sb, errstr = ext4_decode_error(sb, j_errno, nbuf); ext4_warning(sb, "Filesystem error recorded " "from previous mount: %s", errstr); - ext4_warning(sb, "Marking fs in need of filesystem check."); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super(sb); + j_errno = ext4_commit_super(sb); + if (j_errno) + return j_errno; + ext4_warning(sb, "Marked fs in need of filesystem check."); jbd2_journal_clear_err(journal); jbd2_journal_update_sb_errno(journal); diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index e2b8b3437c58..12d6252e3e22 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -501,13 +501,13 @@ static const struct sysfs_ops ext4_attr_ops = { .store = ext4_attr_store, }; -static struct kobj_type ext4_sb_ktype = { +static const struct kobj_type ext4_sb_ktype = { .default_groups = ext4_groups, .sysfs_ops = &ext4_attr_ops, .release = ext4_sb_release, }; -static struct kobj_type ext4_feat_ktype = { +static const struct kobj_type ext4_feat_ktype = { .default_groups = ext4_feat_groups, .sysfs_ops = &ext4_attr_ops, .release = ext4_feat_release, diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 62f2ec599218..767454d74cd6 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2852,6 +2852,9 @@ shift: (void *)header, total_ino); EXT4_I(inode)->i_extra_isize = new_extra_isize; + if (ext4_has_inline_data(inode)) + error = ext4_find_inline_data_nolock(inode); + cleanup: if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) { ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.", diff --git a/fs/file.c b/fs/file.c index c942c89ca4cd..7893ea161d77 100644 --- a/fs/file.c +++ b/fs/file.c @@ -642,6 +642,7 @@ static struct file *pick_file(struct files_struct *files, unsigned fd) if (fd >= fdt->max_fds) return NULL; + fd = array_index_nospec(fd, fdt->max_fds); file = fdt->fd[fd]; if (file) { rcu_assign_pointer(fdt->fd[fd], NULL); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e80c781731f8..8ae419152ff6 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -969,10 +969,13 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, { int err = 0; unsigned long long ret; - sector_t block = 0; + sector_t block = blocknr; - if (journal->j_inode) { - block = blocknr; + if (journal->j_bmap) { + err = journal->j_bmap(journal, &block); + if (err == 0) + *retp = block; + } else if (journal->j_inode) { ret = bmap(journal->j_inode, &block); if (ret || !block) { diff --git a/fs/locks.c b/fs/locks.c index 66b4eef09db5..df8b26a42524 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1863,9 +1863,10 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp, void **priv) { struct inode *inode = file_inode(filp); + vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_idmap(filp), inode); int error; - if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE)) + if ((!vfsuid_eq_kuid(vfsuid, current_fsuid())) && !capable(CAP_LEASE)) return -EACCES; if (!S_ISREG(inode->i_mode)) return -EINVAL; @@ -2425,7 +2426,6 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock) if (flock->l_pid != 0) goto out; - cmd = F_GETLK64; fl->fl_flags |= FL_OFDLCK; fl->fl_owner = filp; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e7462b5e5f1e..502e1b7742db 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1104,7 +1104,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, since = READ_ONCE(file->f_wb_err); if (verf) nfsd_copy_write_verifier(verf, nn); + file_start_write(file); host_err = vfs_iter_write(file, &iter, &pos, flags); + file_end_write(file); if (host_err < 0) { nfsd_reset_write_verifier(nn); trace_nfsd_writeverf_reset(nn, rqstp, host_err); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 1d65f6ef00ca..0394505fdce3 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1977,11 +1977,26 @@ int ocfs2_write_end_nolock(struct address_space *mapping, } if (unlikely(copied < len) && wc->w_target_page) { + loff_t new_isize; + if (!PageUptodate(wc->w_target_page)) copied = 0; - ocfs2_zero_new_buffers(wc->w_target_page, start+copied, - start+len); + new_isize = max_t(loff_t, i_size_read(inode), pos + copied); + if (new_isize > page_offset(wc->w_target_page)) + ocfs2_zero_new_buffers(wc->w_target_page, start+copied, + start+len); + else { + /* + * When page is fully beyond new isize (data copy + * failed), do not bother zeroing the page. Invalidate + * it instead so that writeback does not get confused + * put page & buffer dirty bits into inconsistent + * state. + */ + block_invalidate_folio(page_folio(wc->w_target_page), + 0, PAGE_SIZE); + } } if (wc->w_target_page) flush_dcache_page(wc->w_target_page); diff --git a/fs/splice.c b/fs/splice.c index 2e76dbb81a8f..2c3dec2b6dfa 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -937,7 +937,6 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, /* * Do the splice. */ - ret = 0; bytes = 0; len = sd->total_len; flags = sd->flags; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 41734202796f..2ef78aa1d3f6 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -449,15 +449,17 @@ xfs_prepare_ioend( } /* - * If the page has delalloc blocks on it, we need to punch them out before we - * invalidate the page. If we don't, we leave a stale delalloc mapping on the - * inode that can trip up a later direct I/O read operation on the same region. + * If the folio has delalloc blocks on it, the caller is asking us to punch them + * out. If we don't, we can leave a stale delalloc mapping covered by a clean + * page that needs to be dirtied again before the delalloc mapping can be + * converted. This stale delalloc mapping can trip up a later direct I/O read + * operation on the same region. * - * We prevent this by truncating away the delalloc regions on the page. Because + * We prevent this by truncating away the delalloc regions on the folio. Because * they are delalloc, we can do this without needing a transaction. Indeed - if * we get ENOSPC errors, we have to be able to do this truncation without a - * transaction as there is no space left for block reservation (typically why we - * see a ENOSPC in writeback). + * transaction as there is no space left for block reservation (typically why + * we see a ENOSPC in writeback). */ static void xfs_discard_folio( @@ -475,8 +477,13 @@ xfs_discard_folio( "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", folio, ip->i_ino, pos); + /* + * The end of the punch range is always the offset of the the first + * byte of the next folio. Hence the end offset is only dependent on the + * folio itself and not the start offset that is passed in. + */ error = xfs_bmap_punch_delalloc_range(ip, pos, - round_up(pos, folio_size(folio))); + folio_pos(folio) + folio_size(folio)); if (error && !xfs_is_shutdown(mp)) xfs_alert(mp, "page discard unable to remove delalloc mapping."); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 7dc0db7f5a76..6abcc34fafd8 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1321,15 +1321,14 @@ xfs_qm_quotacheck( error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true, NULL); - if (error) { - /* - * The inode walk may have partially populated the dquot - * caches. We must purge them before disabling quota and - * tearing down the quotainfo, or else the dquots will leak. - */ - xfs_qm_dqpurge_all(mp); - goto error_return; - } + + /* + * On error, the inode walk may have partially populated the dquot + * caches. We must purge them before disabling quota and tearing down + * the quotainfo, or else the dquots will leak. + */ + if (error) + goto error_purge; /* * We've made all the changes that we need to make incore. Flush them @@ -1363,10 +1362,8 @@ xfs_qm_quotacheck( * and turn quotaoff. The dquots won't be attached to any of the inodes * at this point (because we intentionally didn't in dqget_noattach). */ - if (error) { - xfs_qm_dqpurge_all(mp); - goto error_return; - } + if (error) + goto error_purge; /* * If one type of quotas is off, then it will lose its @@ -1376,7 +1373,7 @@ xfs_qm_quotacheck( mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD; mp->m_qflags |= flags; - error_return: +error_return: xfs_buf_delwri_cancel(&buffer_list); if (error) { @@ -1395,6 +1392,21 @@ xfs_qm_quotacheck( } else xfs_notice(mp, "Quotacheck: Done."); return error; + +error_purge: + /* + * On error, we may have inodes queued for inactivation. This may try + * to attach dquots to the inode before running cleanup operations on + * the inode and this can race with the xfs_qm_destroy_quotainfo() call + * below that frees mp->m_quotainfo. To avoid this race, flush all the + * pending inodegc operations before we purge the dquots from memory, + * ensuring that background inactivation is idle whilst we turn off + * quotas. + */ + xfs_inodegc_flush(mp); + xfs_qm_dqpurge_all(mp); + goto error_return; + } /* |