From 428f651cb80b227af47fc302e4931791f2fb4741 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 17 Jan 2022 10:25:07 -0500 Subject: gfs2: assign rgrp glock before compute_bitstructs Before this patch, function read_rindex_entry called compute_bitstructs before it allocated a glock for the rgrp. But if compute_bitstructs found a problem with the rgrp, it called gfs2_consist_rgrpd, and that called gfs2_dump_glock for rgd->rd_gl which had not yet been assigned. read_rindex_entry compute_bitstructs gfs2_consist_rgrpd gfs2_dump_glock <---------rgd->rd_gl was not set. This patch changes read_rindex_entry so it assigns an rgrp glock before calling compute_bitstructs so gfs2_dump_glock does not reference an unassigned pointer. If an error is discovered, the glock must also be put, so a new goto and label were added. Reported-by: syzbot+c6fd14145e2f62ca0784@syzkaller.appspotmail.com Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 0fb3c01bc557..9b04a570c582 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -922,15 +922,15 @@ static int read_rindex_entry(struct gfs2_inode *ip) spin_lock_init(&rgd->rd_rsspin); mutex_init(&rgd->rd_mutex); - error = compute_bitstructs(rgd); - if (error) - goto fail; - error = gfs2_glock_get(sdp, rgd->rd_addr, &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); if (error) goto fail; + error = compute_bitstructs(rgd); + if (error) + goto fail_glock; + rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; rgd->rd_flags &= ~GFS2_RDF_PREFERRED; if (rgd->rd_data > sdp->sd_max_rg_data) @@ -944,6 +944,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) } error = 0; /* someone else read in the rgrp; free it and ignore it */ +fail_glock: gfs2_glock_put(rgd->rd_gl); fail: -- cgit v1.2.3 From 7336905a89f19173bf9301cd50a24421162f417c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 10 Dec 2021 14:43:36 +0100 Subject: gfs2: gfs2_setattr_size error path fix When gfs2_setattr_size() fails, it calls gfs2_rs_delete(ip, NULL) to get rid of any reservations the inode may have. Instead, it should pass in the inode's write count as the second parameter to allow gfs2_rs_delete() to figure out if the inode has any writers left. In a next step, there are two instances of gfs2_rs_delete(ip, NULL) left where we know that there can be no other users of the inode. Replace those with gfs2_rs_deltree(&ip->i_res) to avoid the unnecessary write count check. With that, gfs2_rs_delete() is only called with the inode's actual write count, so get rid of the second parameter. Fixes: a097dc7e24cb ("GFS2: Make rgrp reservations part of the gfs2_inode structure") Signed-off-by: Andreas Gruenbacher --- fs/gfs2/bmap.c | 2 +- fs/gfs2/file.c | 2 +- fs/gfs2/inode.c | 2 +- fs/gfs2/rgrp.c | 7 ++++--- fs/gfs2/rgrp.h | 2 +- fs/gfs2/super.c | 2 +- 6 files changed, 9 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index d67108489148..fbdb7a30470a 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -2146,7 +2146,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) ret = do_shrink(inode, newsize); out: - gfs2_rs_delete(ip, NULL); + gfs2_rs_delete(ip); gfs2_qa_put(ip); return ret; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 8c39a8571b1f..5bac4f6e8e05 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -706,7 +706,7 @@ static int gfs2_release(struct inode *inode, struct file *file) if (file->f_mode & FMODE_WRITE) { if (gfs2_rs_active(&ip->i_res)) - gfs2_rs_delete(ip, &inode->i_writecount); + gfs2_rs_delete(ip); gfs2_qa_put(ip); } return 0; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 89905f4f29bb..66a123306aec 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -793,7 +793,7 @@ fail_free_inode: if (free_vfs_inode) /* else evict will do the put for us */ gfs2_glock_put(ip->i_gl); } - gfs2_rs_delete(ip, NULL); + gfs2_rs_deltree(&ip->i_res); gfs2_qa_put(ip); fail_free_acls: posix_acl_release(default_acl); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 9b04a570c582..9df8a84f85a6 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -680,13 +680,14 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) /** * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation - * @wcount: The inode's write count, or NULL * */ -void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) +void gfs2_rs_delete(struct gfs2_inode *ip) { + struct inode *inode = &ip->i_inode; + down_write(&ip->i_rw_mutex); - if ((wcount == NULL) || (atomic_read(wcount) <= 1)) + if (atomic_read(&inode->i_writecount) <= 1) gfs2_rs_deltree(&ip->i_res); up_write(&ip->i_rw_mutex); } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 3e2ca1fb4305..46dd94e9e085 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -45,7 +45,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); -extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount); +extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 64c67090f503..143a47359d1b 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1396,7 +1396,7 @@ out: truncate_inode_pages_final(&inode->i_data); if (ip->i_qadata) gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); - gfs2_rs_delete(ip, NULL); + gfs2_rs_deltree(&ip->i_res); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); -- cgit v1.2.3 From a892b12393afb74c397dc752eaba1e96e2702303 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 2 Feb 2022 11:00:48 +0100 Subject: gfs2: Expect -EBUSY after canceling dlm locking requests Due to the asynchronous nature of the dlm api, when we request a pending locking request to be canceled with dlm_unlock(DLM_LKF_CANCEL), the locking request will either complete before it could be canceled, or the cancellation will succeed. In either case, gdlm_ast will be called once and the status will indicate the outcome of the locking request, with -DLM_ECANCEL indicating a canceled request. Inside dlm, when a locking request completes before its cancel request could be processed, gdlm_ast will be called, but the lock will still be considered busy until a DLM_MSG_CANCEL_REPLY message completes the cancel request. During that time, successive dlm_lock() or dlm_unlock() requests for that lock will return -EBUSY. In other words, waiting for the gdlm_ast call before issuing the next locking request is not enough. There is no way of waiting for a cancel request to actually complete, either. We rarely cancel locking requests, but when we do, we don't know when the next locking request for that lock will occur. This means that any dlm_lock() or dlm_unlock() call can potentially return -EBUSY. When that happens, this patch simply repeats the request after a short pause. This workaround could be improved upon by tracking for which dlm locks cancel requests have been issued, but that isn't strictly necessary and it would complicate the code. We haven't seen -EBUSY errors from dlm without cancel requests. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/lock_dlm.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 50578f881e6d..2559a79cf14b 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -261,6 +261,7 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, int req; u32 lkf; char strname[GDLM_STRNAME_BYTES] = ""; + int error; req = make_mode(gl->gl_name.ln_sbd, req_state); lkf = make_flags(gl, flags, req); @@ -279,8 +280,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, * Submit the actual lock request. */ - return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, +again: + error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); + if (error == -EBUSY) { + msleep(20); + goto again; + } + return error; } static void gdlm_put_lock(struct gfs2_glock *gl) @@ -312,8 +319,14 @@ static void gdlm_put_lock(struct gfs2_glock *gl) return; } +again: error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, NULL, gl); + if (error == -EBUSY) { + msleep(20); + goto again; + } + if (error) { fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n", gl->gl_name.ln_type, -- cgit v1.2.3 From 1fc05c8d8426d4085a219c23f8855c4aaf9e3ffb Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 24 Jan 2022 12:23:55 -0500 Subject: gfs2: cancel timed-out glock requests The gfs2 evict code tries to upgrade the iopen glock from SH to EX. If the attempt to upgrade times out, gfs2 needs to tell dlm to cancel the lock request or it can deadlock. We also need to wake up the process waiting for the lock when dlm sends its AST back to gfs2. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/glock.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 6b23399eaee0..d368d9a2e8f0 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -669,6 +669,8 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) /* Check for state != intended state */ if (unlikely(state != gl->gl_target)) { + if (gh && (ret & LM_OUT_CANCELED)) + gfs2_holder_wake(gh); if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { /* move to back of queue and try next entry */ if (ret & LM_OUT_CANCELED) { @@ -1691,6 +1693,14 @@ void gfs2_glock_dq(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; spin_lock(&gl->gl_lockref.lock); + if (list_is_first(&gh->gh_list, &gl->gl_holders) && + !test_bit(HIF_HOLDER, &gh->gh_iflags)) { + spin_unlock(&gl->gl_lockref.lock); + gl->gl_name.ln_sbd->sd_lockstruct.ls_ops->lm_cancel(gl); + wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); + spin_lock(&gl->gl_lockref.lock); + } + __gfs2_glock_dq(gh); spin_unlock(&gl->gl_lockref.lock); } -- cgit v1.2.3 From 29464ee36bcaaee2691249f49b9592b8d5c97ece Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 24 Jan 2022 12:23:57 -0500 Subject: gfs2: Switch lock order of inode and iopen glock This patch tries to fix the continual ABBA deadlocks we keep having between the iopen and inode glocks. This switches the lock order in gfs2_inode_lookup and gfs2_create_inode so the iopen glock is always locked first. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/inode.c | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 66a123306aec..c8ec876f33ea 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -131,7 +131,21 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_glock *io_gl; - error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); + error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, + &ip->i_gl); + if (unlikely(error)) + goto fail; + + error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, + &io_gl); + if (unlikely(error)) + goto fail; + + if (blktype != GFS2_BLKST_UNLINKED) + gfs2_cancel_delete_work(io_gl); + error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, + &ip->i_iopen_gh); + gfs2_glock_put(io_gl); if (unlikely(error)) goto fail; @@ -161,16 +175,6 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, set_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags); - error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); - if (unlikely(error)) - goto fail; - if (blktype != GFS2_BLKST_UNLINKED) - gfs2_cancel_delete_work(io_gl); - error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); - gfs2_glock_put(io_gl); - if (unlikely(error)) - goto fail; - /* Lowest possible timestamp; will be overwritten in gfs2_dinode_in. */ inode->i_atime.tv_sec = 1LL << (8 * sizeof(inode->i_atime.tv_sec) - 1); inode->i_atime.tv_nsec = 0; @@ -716,13 +720,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr); BUG_ON(error); - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); + error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (error) goto fail_gunlock2; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); + if (error) + goto fail_gunlock3; + error = gfs2_trans_begin(sdp, blocks, 0); if (error) - goto fail_gunlock2; + goto fail_gunlock3; if (blocks > 1) { ip->i_eattr = ip->i_no_addr + 1; @@ -731,10 +739,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, init_dinode(dip, ip, symname); gfs2_trans_end(sdp); - error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); - if (error) - goto fail_gunlock2; - glock_set_object(ip->i_gl, ip); glock_set_object(io_gl, ip); gfs2_set_iop(inode); @@ -745,14 +749,14 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (default_acl) { error = __gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); if (error) - goto fail_gunlock3; + goto fail_gunlock4; posix_acl_release(default_acl); default_acl = NULL; } if (acl) { error = __gfs2_set_acl(inode, acl, ACL_TYPE_ACCESS); if (error) - goto fail_gunlock3; + goto fail_gunlock4; posix_acl_release(acl); acl = NULL; } @@ -760,11 +764,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = security_inode_init_security(&ip->i_inode, &dip->i_inode, name, &gfs2_initxattrs, NULL); if (error) - goto fail_gunlock3; + goto fail_gunlock4; error = link_dinode(dip, name, ip, &da); if (error) - goto fail_gunlock3; + goto fail_gunlock4; mark_inode_dirty(inode); d_instantiate(dentry, inode); @@ -782,9 +786,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, unlock_new_inode(inode); return error; -fail_gunlock3: +fail_gunlock4: glock_clear_object(ip->i_gl, ip); glock_clear_object(io_gl, ip); +fail_gunlock3: gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_gunlock2: gfs2_glock_put(io_gl); -- cgit v1.2.3 From 5a27a43efd1d5cd55dd5988cac596bdea7294d73 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 13 Jan 2022 02:54:55 +0100 Subject: gfs2: Make use of list_is_first Use list_is_first() instead of open-coding it. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d368d9a2e8f0..a12bb3e8cb9c 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -542,7 +542,7 @@ restart: * some reason. If this holder is the head of the list, it * means we have a blocked holder at the head, so return 1. */ - if (gh->gh_list.prev == &gl->gl_holders) + if (list_is_first(&gh->gh_list, &gl->gl_holders)) return 1; do_error(gl, 0); break; -- cgit v1.2.3 From a4e8145edcfd66b69056c59885770b428eccca59 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 14 Jan 2022 09:05:13 +0100 Subject: gfs2: Initialize gh_error in gfs2_glock_nq The gh_error field if a glock holder is initialized to zero in gfs2_holder_init(). When a locking operation fails, gh_error is set to an error code; when it succeeds, the gh_error value is left unchanged. The field isn't initialized in gfs2_holder_reinit(), which is a problem. Instead of fixing that directly, initialize gh_error in gfs2_glock_nq(). That also obsoletes the assignment in do_flock(). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 1 - fs/gfs2/glock.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 5bac4f6e8e05..a743a9eb602f 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1497,7 +1497,6 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) if (error != GLR_TRYFAILED) break; fl_gh->gh_flags = LM_FLAG_TRY | GL_EXACT; - fl_gh->gh_error = 0; msleep(sleeptime); } if (error) { diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a12bb3e8cb9c..630c6550eacf 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1261,7 +1261,6 @@ void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, gh->gh_owner_pid = get_pid(task_pid(current)); gh->gh_state = state; gh->gh_flags = flags; - gh->gh_error = 0; gh->gh_iflags = 0; gfs2_glock_hold(gl); } @@ -1567,6 +1566,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh) if (test_bit(GLF_LRU, &gl->gl_flags)) gfs2_glock_remove_from_lru(gl); + gh->gh_error = 0; spin_lock(&gl->gl_lockref.lock); add_to_queue(gh); if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && -- cgit v1.2.3 From b2963932346f0631c5145f656a22d4237f0956fa Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 3 Mar 2022 07:32:43 -0500 Subject: gfs2: Remove return value for gfs2_indirect_init The return value from function gfs2_indirect_init is never used, so remove it. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/bmap.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index fbdb7a30470a..39080b2d6cf8 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -606,9 +606,9 @@ out: return ret; } -static inline __be64 *gfs2_indirect_init(struct metapath *mp, - struct gfs2_glock *gl, unsigned int i, - unsigned offset, u64 bn) +static inline void gfs2_indirect_init(struct metapath *mp, + struct gfs2_glock *gl, unsigned int i, + unsigned offset, u64 bn) { __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data + ((i > 1) ? sizeof(struct gfs2_meta_header) : @@ -621,7 +621,6 @@ static inline __be64 *gfs2_indirect_init(struct metapath *mp, gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); ptr += offset; *ptr = cpu_to_be64(bn); - return ptr; } enum alloc_state { -- cgit v1.2.3 From bb7f5d96aaa87d5aee2f5eb98ae0b84f08988489 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 7 Mar 2022 23:30:03 +0100 Subject: gfs2: Fix should_fault_in_pages() logic Fix the fault-in window size logic: * Use a maximum window size of 1 MiB instead of BIO_MAX_VECS * PAGE_SIZE. The previous window size was always one page because the pages variable was accidentally being defined and then redefined in should_fault_in_pages(). * The nr_dirtied heuristic for guessing when there might be memory pressure often results in very small window sizes. Don't let nr_dirtied drop below 8 pages (as btrfs does). * Compute the window size in units of bytes, not pages. * Account for page overlap (unaligned iterators). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index a743a9eb602f..0c8cf10281da 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -775,8 +775,7 @@ static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i, size_t *window_size) { size_t count = iov_iter_count(i); - char __user *p; - int pages = 1; + size_t size, offs; if (likely(!count)) return false; @@ -785,18 +784,20 @@ static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i, if (!iter_is_iovec(i)) return false; + size = PAGE_SIZE; + offs = offset_in_page(i->iov[0].iov_base + i->iov_offset); if (*prev_count != count || !*window_size) { - int pages, nr_dirtied; + size_t nr_dirtied; - pages = min_t(int, BIO_MAX_VECS, DIV_ROUND_UP(count, PAGE_SIZE)); + size = ALIGN(offs + count, PAGE_SIZE); + size = min_t(size_t, size, SZ_1M); nr_dirtied = max(current->nr_dirtied_pause - - current->nr_dirtied, 1); - pages = min(pages, nr_dirtied); + current->nr_dirtied, 8); + size = min(size, nr_dirtied << PAGE_SHIFT); } *prev_count = count; - p = i->iov[0].iov_base + i->iov_offset; - *window_size = (size_t)PAGE_SIZE * pages - offset_in_page(p); + *window_size = size - offs; return true; } -- cgit v1.2.3 From 52f3f033a5dbd023307520af1ff551cadfd7f037 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 14 Mar 2022 18:32:02 +0100 Subject: gfs2: Disable page faults during lockless buffered reads During lockless buffered reads, filemap_read() holds page cache page references while trying to copy data to the user-space buffer. The calling process isn't holding the inode glock, but the page references it holds prevent those pages from being removed from the page cache, and that prevents the underlying inode glock from being moved to another node. Thus, we can end up in the same kinds of distributed deadlock situations as with normal (non-lockless) buffered reads. Fix that by disabling page faults during lockless reads as well. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 0c8cf10281da..44132e4c8789 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -957,14 +957,16 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) return ret; iocb->ki_flags &= ~IOCB_DIRECT; } + pagefault_disable(); iocb->ki_flags |= IOCB_NOIO; ret = generic_file_read_iter(iocb, to); iocb->ki_flags &= ~IOCB_NOIO; + pagefault_enable(); if (ret >= 0) { if (!iov_iter_count(to)) return ret; written = ret; - } else { + } else if (ret != -EFAULT) { if (ret != -EAGAIN) return ret; if (iocb->ki_flags & IOCB_NOWAIT) -- cgit v1.2.3 From 124c458a401a2497f796e4f2d6cafac6edbea8e9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Mar 2022 14:20:38 +0100 Subject: gfs2: Minor retry logic cleanup Clean up the retry logic in the read and write functions somewhat. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 44132e4c8789..44bb886eefce 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -852,9 +852,9 @@ retry_under_glock: leftover = fault_in_iov_iter_writeable(to, window_size); gfs2_holder_disallow_demote(gh); if (leftover != window_size) { - if (!gfs2_holder_queued(gh)) - goto retry; - goto retry_under_glock; + if (gfs2_holder_queued(gh)) + goto retry_under_glock; + goto retry; } } if (gfs2_holder_queued(gh)) @@ -921,9 +921,9 @@ retry_under_glock: leftover = fault_in_iov_iter_readable(from, window_size); gfs2_holder_disallow_demote(gh); if (leftover != window_size) { - if (!gfs2_holder_queued(gh)) - goto retry; - goto retry_under_glock; + if (gfs2_holder_queued(gh)) + goto retry_under_glock; + goto retry; } } out: @@ -992,12 +992,11 @@ retry_under_glock: leftover = fault_in_iov_iter_writeable(to, window_size); gfs2_holder_disallow_demote(&gh); if (leftover != window_size) { - if (!gfs2_holder_queued(&gh)) { - if (written) - goto out_uninit; - goto retry; - } - goto retry_under_glock; + if (gfs2_holder_queued(&gh)) + goto retry_under_glock; + if (written) + goto out_uninit; + goto retry; } } if (gfs2_holder_queued(&gh)) @@ -1071,12 +1070,11 @@ retry_under_glock: gfs2_holder_disallow_demote(gh); if (leftover != window_size) { from->count = min(from->count, window_size - leftover); - if (!gfs2_holder_queued(gh)) { - if (read) - goto out_uninit; - goto retry; - } - goto retry_under_glock; + if (gfs2_holder_queued(gh)) + goto retry_under_glock; + if (read) + goto out_uninit; + goto retry; } } out_unlock: -- cgit v1.2.3 From 46f3e0421ccb5474b5c006b0089b9dfd42534bb6 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Mar 2022 14:47:24 +0100 Subject: gfs2: Fix gfs2_file_buffered_write endless loop workaround Since commit 554c577cee95b, gfs2_file_buffered_write() can accidentally return a truncated iov_iter, which might confuse callers. Fix that. Fixes: 554c577cee95b ("gfs2: Prevent endless loops in gfs2_file_buffered_write") Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 44bb886eefce..19a038bc33bc 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1084,6 +1084,7 @@ out_uninit: gfs2_holder_uninit(gh); if (statfs_gh) kfree(statfs_gh); + from->count = orig_count - read; return read ? read : ret; } -- cgit v1.2.3 From 11661835f90153bdfc5325e550d2b72d0f47cb3e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Mar 2022 22:40:20 +0100 Subject: gfs2: Remove dead code in gfs2_file_read_iter Function iomap_dio_rw() only returns -ENOTBLK for write requests and gfs2_file_direct_read() no longer returns -ENOTBLK since commit 1d45bb7f9d2a5 ("gfs2: Use iomap for stuffed direct I/O reads"), so there is no need to check for -ENOTBLK in gfs2_file_read_iter() anymore. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 19a038bc33bc..edc588465a4b 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -951,12 +951,9 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) * and retry. */ - if (iocb->ki_flags & IOCB_DIRECT) { - ret = gfs2_file_direct_read(iocb, to, &gh); - if (likely(ret != -ENOTBLK)) - return ret; - iocb->ki_flags &= ~IOCB_DIRECT; - } + if (iocb->ki_flags & IOCB_DIRECT) + return gfs2_file_direct_read(iocb, to, &gh); + pagefault_disable(); iocb->ki_flags |= IOCB_NOIO; ret = generic_file_read_iter(iocb, to); -- cgit v1.2.3 From 3bde4c48586074202044456285a97ccdf9048988 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Mar 2022 23:13:26 +0100 Subject: gfs2: Make sure not to return short direct writes When direct writes fail with -ENOTBLK because we're writing into a hole (gfs2_iomap_begin()) or because of a page invalidation failure (iomap_dio_rw()), we're falling back to buffered writes. In that case, when we lose the inode glock in gfs2_file_buffered_write(), we want to re-acquire it instead of returning a short write. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index edc588465a4b..22b41acfbbc3 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1069,7 +1069,7 @@ retry_under_glock: from->count = min(from->count, window_size - leftover); if (gfs2_holder_queued(gh)) goto retry_under_glock; - if (read) + if (read && !(iocb->ki_flags & IOCB_DIRECT)) goto out_uninit; goto retry; } -- cgit v1.2.3 From 27ca8273fda398638ca994a207323a85b6d81190 Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Tue, 22 Mar 2022 19:05:51 +0000 Subject: gfs2: Make sure FITRIM minlen is rounded up to fs block size Per fstrim(8) we must round up the minlen argument to the fs block size. The current calculation doesn't take into account devices that have a discard granularity and requested minlen less than 1 fs block, so the value can get shifted away to zero in the translation to fs blocks. The zero minlen passed to gfs2_rgrp_send_discards() then allows sb_issue_discard() to be called with nr_sects == 0 which returns -EINVAL and results in gfs2_rgrp_send_discards() returning -EIO. Make sure minlen is never < 1 fs block by taking the max of the requested minlen and the fs block size before comparing to the device's discard granularity and shifting to fs blocks. Fixes: 076f0faa764ab ("GFS2: Fix FITRIM argument handling") Signed-off-by: Andrew Price Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 9df8a84f85a6..801ad9f4f2be 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1417,7 +1417,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp) start = r.start >> bs_shift; end = start + (r.len >> bs_shift); - minlen = max_t(u64, r.minlen, + minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize); + minlen = max_t(u64, minlen, q->limits.discard_granularity) >> bs_shift; if (end <= start || minlen > sdp->sd_max_rg_data) -- cgit v1.2.3