diff options
author | Bob Peterson <rpeterso@redhat.com> | 2021-10-06 17:29:18 +0300 |
---|---|---|
committer | Andreas Gruenbacher <agruenba@redhat.com> | 2021-10-25 09:42:19 +0300 |
commit | f2e70d8f2fdff0707b3f4de4ef87f93e4396320c (patch) | |
tree | 04bba85368b8717b15871a28c3bc058c9dc93c85 | |
parent | e6f856008d2364a16610d6269b6b38503d5e41a4 (diff) | |
download | linux-f2e70d8f2fdff0707b3f4de4ef87f93e4396320c.tar.xz |
gfs2: fix GL_SKIP node_scope problems
Before this patch, when a glock was locked, the very first holder on the
queue would unlock the lockref and call the go_instantiate glops function
(if one existed), unless GL_SKIP was specified. When we introduced the new
node-scope concept, we allowed multiple holders to lock glocks in EX mode
and share the lock.
But node-scope introduced a new problem: if the first holder has GL_SKIP
and the next one does NOT, since it is not the first holder on the queue,
the go_instantiate op was not called. Eventually the GL_SKIP holder may
call the instantiate sub-function (e.g. gfs2_rgrp_bh_get) but there was
still a window of time in which another non-GL_SKIP holder assumes the
instantiate function had been called by the first holder. In the case of
rgrp glocks, this led to a NULL pointer dereference on the buffer_heads.
This patch tries to fix the problem by introducing two new glock flags:
GLF_INSTANTIATE_NEEDED, which keeps track of when the instantiate function
needs to be called to "fill in" or "read in" the object before it is
referenced.
GLF_INSTANTIATE_IN_PROG which is used to determine when a process is
in the process of reading in the object. Whenever a function needs to
reference the object, it checks the GLF_INSTANTIATE_NEEDED flag, and if
set, it sets GLF_INSTANTIATE_IN_PROG and calls the glops "go_instantiate"
function.
As before, the gl_lockref spin_lock is unlocked during the IO operation,
which may take a relatively long amount of time to complete. While
unlocked, if another process determines go_instantiate is still needed,
it sees GLF_INSTANTIATE_IN_PROG is set, and waits for the go_instantiate
glop operation to be completed. Once GLF_INSTANTIATE_IN_PROG is cleared,
it needs to check GLF_INSTANTIATE_NEEDED again because the other process's
go_instantiate operation may not have been successful.
Functions that previously called the instantiate sub-functions now call
directly into gfs2_instantiate so the new bits are managed properly.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
-rw-r--r-- | fs/gfs2/glock.c | 42 | ||||
-rw-r--r-- | fs/gfs2/glock.h | 1 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 10 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 2 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 3 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 21 | ||||
-rw-r--r-- | fs/gfs2/super.c | 2 |
7 files changed, 61 insertions, 20 deletions
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 75d54ed7e54e..08073dcaf30e 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -479,12 +479,42 @@ find_first_strong_holder(struct gfs2_glock *gl) * Returns: 0 if instantiate was successful, 2 if type specific operation is * underway, or error. */ -static int gfs2_instantiate(struct gfs2_holder *gh) +int gfs2_instantiate(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; const struct gfs2_glock_operations *glops = gl->gl_ops; + int ret; + +again: + if (!test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags)) + return 0; - return glops->go_instantiate(gh); + /* + * Since we unlock the lockref lock, we set a flag to indicate + * instantiate is in progress. + */ + if (test_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags)) { + wait_on_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG, + TASK_UNINTERRUPTIBLE); + /* + * Here we just waited for a different instantiate to finish. + * But that may not have been successful, as when a process + * locks an inode glock _before_ it has an actual inode to + * instantiate into. So we check again. This process might + * have an inode to instantiate, so might be successful. + */ + goto again; + } + + set_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags); + + ret = glops->go_instantiate(gh); + if (!ret) + clear_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags); + clear_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags); + smp_mb__after_atomic(); + wake_up_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG); + return ret; } /** @@ -526,7 +556,7 @@ restart: incompat_holders_demoted = true; first_gh = gh; } - if (gh->gh_list.prev == &gl->gl_holders && + if (test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags) && !(gh->gh_flags & GL_SKIP) && gl->gl_ops->go_instantiate) { lock_released = true; spin_unlock(&gl->gl_lockref.lock); @@ -1162,7 +1192,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, atomic_inc(&sdp->sd_glock_disposal); gl->gl_node.next = NULL; - gl->gl_flags = 0; + gl->gl_flags = glops->go_instantiate ? BIT(GLF_INSTANTIATE_NEEDED) : 0; gl->gl_name = name; lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass); gl->gl_lockref.count = 1; @@ -2326,6 +2356,10 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) *p++ = 'P'; if (test_bit(GLF_FREEING, gflags)) *p++ = 'x'; + if (test_bit(GLF_INSTANTIATE_NEEDED, gflags)) + *p++ = 'n'; + if (test_bit(GLF_INSTANTIATE_IN_PROG, gflags)) + *p++ = 'N'; *p = 0; return buf; } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index a36104229d0d..4f8642301801 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -204,6 +204,7 @@ extern void gfs2_holder_reinit(unsigned int state, u16 flags, extern void gfs2_holder_uninit(struct gfs2_holder *gh); extern int gfs2_glock_nq(struct gfs2_holder *gh); extern int gfs2_glock_poll(struct gfs2_holder *gh); +extern int gfs2_instantiate(struct gfs2_holder *gh); extern int gfs2_glock_wait(struct gfs2_holder *gh); extern int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs); extern void gfs2_glock_dq(struct gfs2_holder *gh); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 8452a83bd55a..e2656baf38a3 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -357,6 +357,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) truncate_inode_pages(mapping, 0); if (ip) { set_bit(GIF_INVALID, &ip->i_flags); + set_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags); forget_all_cached_acls(&ip->i_inode); security_inode_invalidate_secctx(&ip->i_inode); gfs2_dir_hash_inval(ip); @@ -495,13 +496,13 @@ static int inode_go_instantiate(struct gfs2_holder *gh) struct gfs2_inode *ip = gl->gl_object; int error = 0; - if (!ip) - return 0; + if (!ip) /* no inode to populate - read it in later */ + goto out; if (test_bit(GIF_INVALID, &ip->i_flags)) { error = gfs2_inode_refresh(ip); if (error) - return error; + goto out; } if (gh->gh_state != LM_ST_DEFERRED) @@ -515,9 +516,10 @@ static int inode_go_instantiate(struct gfs2_holder *gh) list_add(&ip->i_trunc_list, &sdp->sd_trunc_list); spin_unlock(&sdp->sd_trunc_lock); wake_up(&sdp->sd_quota_wait); - return 1; + error = 1; } +out: return error; } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index af632dc65231..19a4c6132c67 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -316,6 +316,7 @@ struct gfs2_alloc_parms { enum { GLF_LOCK = 1, + GLF_INSTANTIATE_NEEDED = 2, /* needs instantiate */ GLF_DEMOTE = 3, GLF_PENDING_DEMOTE = 4, GLF_DEMOTE_IN_PROGRESS = 5, @@ -325,6 +326,7 @@ enum { GLF_REPLY_PENDING = 9, GLF_INITIAL = 10, GLF_FROZEN = 11, + GLF_INSTANTIATE_IN_PROG = 12, /* instantiate happening now */ GLF_LRU = 13, GLF_OBJECT = 14, /* Used only for tracing */ GLF_BLOCKING = 15, diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index d8cd835c560a..940d3e37250d 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -183,6 +183,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, glock_set_object(ip->i_gl, ip); set_bit(GIF_INVALID, &ip->i_flags); + set_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags); error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (unlikely(error)) goto fail; @@ -196,7 +197,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, if (type == DT_UNKNOWN) { /* Inode glock must be locked already */ - error = gfs2_inode_refresh(GFS2_I(inode)); + error = gfs2_instantiate(&i_gh); if (error) goto fail; } else { diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index cdcbc822064d..5ee7da3a450e 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1238,8 +1238,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); - } - else if (sdp->sd_args.ar_rgrplvb) { + } else if (sdp->sd_args.ar_rgrplvb) { if (!gfs2_rgrp_lvb_valid(rgd)){ gfs2_consist_rgrpd(rgd); error = -EIO; @@ -1257,11 +1256,10 @@ fail: bi->bi_bh = NULL; gfs2_assert_warn(sdp, !bi->bi_clone); } - return error; } -static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) +static int update_rgrp_lvb(struct gfs2_rgrpd *rgd, struct gfs2_holder *gh) { u32 rl_flags; @@ -1269,7 +1267,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) return 0; if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) - return gfs2_rgrp_bh_get(rgd); + return gfs2_instantiate(gh); rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); rl_flags &= ~GFS2_RDF_MASK; @@ -1312,6 +1310,7 @@ void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd) bi->bi_bh = NULL; } } + set_bit(GLF_INSTANTIATE_NEEDED, &rgd->rd_gl->gl_flags); } int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, @@ -2110,7 +2109,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) gfs2_rgrp_congested(rs->rs_rgd, loops)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) { - error = update_rgrp_lvb(rs->rs_rgd); + error = update_rgrp_lvb(rs->rs_rgd, + &ip->i_rgd_gh); if (unlikely(error)) { rgrp_unlock_local(rs->rs_rgd); gfs2_glock_dq_uninit(&ip->i_rgd_gh); @@ -2125,8 +2125,11 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) (loops == 0 && target > rs->rs_rgd->rd_extfail_pt)) goto skip_rgrp; - if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rgd); + if (sdp->sd_args.ar_rgrplvb) { + error = gfs2_instantiate(&ip->i_rgd_gh); + if (error) + goto skip_rgrp; + } /* Get a reservation if we don't already have one */ if (!gfs2_rs_active(rs)) @@ -2762,8 +2765,6 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) void rgrp_lock_local(struct gfs2_rgrpd *rgd) { - GLOCK_BUG_ON(rgd->rd_gl, !gfs2_glock_is_held_excl(rgd->rd_gl) && - !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags)); mutex_lock(&rgd->rd_mutex); } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 6e00d15ef0a8..26c726580041 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1245,7 +1245,7 @@ static enum dinode_demise evict_should_delete(struct inode *inode, return SHOULD_NOT_DELETE_DINODE; if (test_bit(GIF_INVALID, &ip->i_flags)) { - ret = gfs2_inode_refresh(ip); + ret = gfs2_instantiate(gh); if (ret) return SHOULD_NOT_DELETE_DINODE; } |