summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBob Peterson <rpeterso@redhat.com>2021-10-06 17:29:18 +0300
committerAndreas Gruenbacher <agruenba@redhat.com>2021-10-25 09:42:19 +0300
commitf2e70d8f2fdff0707b3f4de4ef87f93e4396320c (patch)
tree04bba85368b8717b15871a28c3bc058c9dc93c85
parente6f856008d2364a16610d6269b6b38503d5e41a4 (diff)
downloadlinux-f2e70d8f2fdff0707b3f4de4ef87f93e4396320c.tar.xz
gfs2: fix GL_SKIP node_scope problems
Before this patch, when a glock was locked, the very first holder on the queue would unlock the lockref and call the go_instantiate glops function (if one existed), unless GL_SKIP was specified. When we introduced the new node-scope concept, we allowed multiple holders to lock glocks in EX mode and share the lock. But node-scope introduced a new problem: if the first holder has GL_SKIP and the next one does NOT, since it is not the first holder on the queue, the go_instantiate op was not called. Eventually the GL_SKIP holder may call the instantiate sub-function (e.g. gfs2_rgrp_bh_get) but there was still a window of time in which another non-GL_SKIP holder assumes the instantiate function had been called by the first holder. In the case of rgrp glocks, this led to a NULL pointer dereference on the buffer_heads. This patch tries to fix the problem by introducing two new glock flags: GLF_INSTANTIATE_NEEDED, which keeps track of when the instantiate function needs to be called to "fill in" or "read in" the object before it is referenced. GLF_INSTANTIATE_IN_PROG which is used to determine when a process is in the process of reading in the object. Whenever a function needs to reference the object, it checks the GLF_INSTANTIATE_NEEDED flag, and if set, it sets GLF_INSTANTIATE_IN_PROG and calls the glops "go_instantiate" function. As before, the gl_lockref spin_lock is unlocked during the IO operation, which may take a relatively long amount of time to complete. While unlocked, if another process determines go_instantiate is still needed, it sees GLF_INSTANTIATE_IN_PROG is set, and waits for the go_instantiate glop operation to be completed. Once GLF_INSTANTIATE_IN_PROG is cleared, it needs to check GLF_INSTANTIATE_NEEDED again because the other process's go_instantiate operation may not have been successful. Functions that previously called the instantiate sub-functions now call directly into gfs2_instantiate so the new bits are managed properly. Signed-off-by: Bob Peterson <rpeterso@redhat.com> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
-rw-r--r--fs/gfs2/glock.c42
-rw-r--r--fs/gfs2/glock.h1
-rw-r--r--fs/gfs2/glops.c10
-rw-r--r--fs/gfs2/incore.h2
-rw-r--r--fs/gfs2/inode.c3
-rw-r--r--fs/gfs2/rgrp.c21
-rw-r--r--fs/gfs2/super.c2
7 files changed, 61 insertions, 20 deletions
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 75d54ed7e54e..08073dcaf30e 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -479,12 +479,42 @@ find_first_strong_holder(struct gfs2_glock *gl)
* Returns: 0 if instantiate was successful, 2 if type specific operation is
* underway, or error.
*/
-static int gfs2_instantiate(struct gfs2_holder *gh)
+int gfs2_instantiate(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
const struct gfs2_glock_operations *glops = gl->gl_ops;
+ int ret;
+
+again:
+ if (!test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags))
+ return 0;
- return glops->go_instantiate(gh);
+ /*
+ * Since we unlock the lockref lock, we set a flag to indicate
+ * instantiate is in progress.
+ */
+ if (test_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags)) {
+ wait_on_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG,
+ TASK_UNINTERRUPTIBLE);
+ /*
+ * Here we just waited for a different instantiate to finish.
+ * But that may not have been successful, as when a process
+ * locks an inode glock _before_ it has an actual inode to
+ * instantiate into. So we check again. This process might
+ * have an inode to instantiate, so might be successful.
+ */
+ goto again;
+ }
+
+ set_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags);
+
+ ret = glops->go_instantiate(gh);
+ if (!ret)
+ clear_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags);
+ clear_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG);
+ return ret;
}
/**
@@ -526,7 +556,7 @@ restart:
incompat_holders_demoted = true;
first_gh = gh;
}
- if (gh->gh_list.prev == &gl->gl_holders &&
+ if (test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags) &&
!(gh->gh_flags & GL_SKIP) && gl->gl_ops->go_instantiate) {
lock_released = true;
spin_unlock(&gl->gl_lockref.lock);
@@ -1162,7 +1192,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
atomic_inc(&sdp->sd_glock_disposal);
gl->gl_node.next = NULL;
- gl->gl_flags = 0;
+ gl->gl_flags = glops->go_instantiate ? BIT(GLF_INSTANTIATE_NEEDED) : 0;
gl->gl_name = name;
lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass);
gl->gl_lockref.count = 1;
@@ -2326,6 +2356,10 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
*p++ = 'P';
if (test_bit(GLF_FREEING, gflags))
*p++ = 'x';
+ if (test_bit(GLF_INSTANTIATE_NEEDED, gflags))
+ *p++ = 'n';
+ if (test_bit(GLF_INSTANTIATE_IN_PROG, gflags))
+ *p++ = 'N';
*p = 0;
return buf;
}
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index a36104229d0d..4f8642301801 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -204,6 +204,7 @@ extern void gfs2_holder_reinit(unsigned int state, u16 flags,
extern void gfs2_holder_uninit(struct gfs2_holder *gh);
extern int gfs2_glock_nq(struct gfs2_holder *gh);
extern int gfs2_glock_poll(struct gfs2_holder *gh);
+extern int gfs2_instantiate(struct gfs2_holder *gh);
extern int gfs2_glock_wait(struct gfs2_holder *gh);
extern int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs);
extern void gfs2_glock_dq(struct gfs2_holder *gh);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 8452a83bd55a..e2656baf38a3 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -357,6 +357,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
truncate_inode_pages(mapping, 0);
if (ip) {
set_bit(GIF_INVALID, &ip->i_flags);
+ set_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags);
forget_all_cached_acls(&ip->i_inode);
security_inode_invalidate_secctx(&ip->i_inode);
gfs2_dir_hash_inval(ip);
@@ -495,13 +496,13 @@ static int inode_go_instantiate(struct gfs2_holder *gh)
struct gfs2_inode *ip = gl->gl_object;
int error = 0;
- if (!ip)
- return 0;
+ if (!ip) /* no inode to populate - read it in later */
+ goto out;
if (test_bit(GIF_INVALID, &ip->i_flags)) {
error = gfs2_inode_refresh(ip);
if (error)
- return error;
+ goto out;
}
if (gh->gh_state != LM_ST_DEFERRED)
@@ -515,9 +516,10 @@ static int inode_go_instantiate(struct gfs2_holder *gh)
list_add(&ip->i_trunc_list, &sdp->sd_trunc_list);
spin_unlock(&sdp->sd_trunc_lock);
wake_up(&sdp->sd_quota_wait);
- return 1;
+ error = 1;
}
+out:
return error;
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index af632dc65231..19a4c6132c67 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -316,6 +316,7 @@ struct gfs2_alloc_parms {
enum {
GLF_LOCK = 1,
+ GLF_INSTANTIATE_NEEDED = 2, /* needs instantiate */
GLF_DEMOTE = 3,
GLF_PENDING_DEMOTE = 4,
GLF_DEMOTE_IN_PROGRESS = 5,
@@ -325,6 +326,7 @@ enum {
GLF_REPLY_PENDING = 9,
GLF_INITIAL = 10,
GLF_FROZEN = 11,
+ GLF_INSTANTIATE_IN_PROG = 12, /* instantiate happening now */
GLF_LRU = 13,
GLF_OBJECT = 14, /* Used only for tracing */
GLF_BLOCKING = 15,
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index d8cd835c560a..940d3e37250d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -183,6 +183,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
glock_set_object(ip->i_gl, ip);
set_bit(GIF_INVALID, &ip->i_flags);
+ set_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags);
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (unlikely(error))
goto fail;
@@ -196,7 +197,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
if (type == DT_UNKNOWN) {
/* Inode glock must be locked already */
- error = gfs2_inode_refresh(GFS2_I(inode));
+ error = gfs2_instantiate(&i_gh);
if (error)
goto fail;
} else {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index cdcbc822064d..5ee7da3a450e 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1238,8 +1238,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
rgd->rd_bits[0].bi_bh->b_data);
- }
- else if (sdp->sd_args.ar_rgrplvb) {
+ } else if (sdp->sd_args.ar_rgrplvb) {
if (!gfs2_rgrp_lvb_valid(rgd)){
gfs2_consist_rgrpd(rgd);
error = -EIO;
@@ -1257,11 +1256,10 @@ fail:
bi->bi_bh = NULL;
gfs2_assert_warn(sdp, !bi->bi_clone);
}
-
return error;
}
-static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
+static int update_rgrp_lvb(struct gfs2_rgrpd *rgd, struct gfs2_holder *gh)
{
u32 rl_flags;
@@ -1269,7 +1267,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
return 0;
if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
- return gfs2_rgrp_bh_get(rgd);
+ return gfs2_instantiate(gh);
rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
rl_flags &= ~GFS2_RDF_MASK;
@@ -1312,6 +1310,7 @@ void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
bi->bi_bh = NULL;
}
}
+ set_bit(GLF_INSTANTIATE_NEEDED, &rgd->rd_gl->gl_flags);
}
int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
@@ -2110,7 +2109,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
gfs2_rgrp_congested(rs->rs_rgd, loops))
goto skip_rgrp;
if (sdp->sd_args.ar_rgrplvb) {
- error = update_rgrp_lvb(rs->rs_rgd);
+ error = update_rgrp_lvb(rs->rs_rgd,
+ &ip->i_rgd_gh);
if (unlikely(error)) {
rgrp_unlock_local(rs->rs_rgd);
gfs2_glock_dq_uninit(&ip->i_rgd_gh);
@@ -2125,8 +2125,11 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
(loops == 0 && target > rs->rs_rgd->rd_extfail_pt))
goto skip_rgrp;
- if (sdp->sd_args.ar_rgrplvb)
- gfs2_rgrp_bh_get(rs->rs_rgd);
+ if (sdp->sd_args.ar_rgrplvb) {
+ error = gfs2_instantiate(&ip->i_rgd_gh);
+ if (error)
+ goto skip_rgrp;
+ }
/* Get a reservation if we don't already have one */
if (!gfs2_rs_active(rs))
@@ -2762,8 +2765,6 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
void rgrp_lock_local(struct gfs2_rgrpd *rgd)
{
- GLOCK_BUG_ON(rgd->rd_gl, !gfs2_glock_is_held_excl(rgd->rd_gl) &&
- !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags));
mutex_lock(&rgd->rd_mutex);
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 6e00d15ef0a8..26c726580041 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1245,7 +1245,7 @@ static enum dinode_demise evict_should_delete(struct inode *inode,
return SHOULD_NOT_DELETE_DINODE;
if (test_bit(GIF_INVALID, &ip->i_flags)) {
- ret = gfs2_inode_refresh(ip);
+ ret = gfs2_instantiate(gh);
if (ret)
return SHOULD_NOT_DELETE_DINODE;
}