From badb55ec208adc4c406ed084f486deb1f9f5baa0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 23 Jan 2020 18:41:00 +0100 Subject: gfs2: Split gfs2_lm_withdraw into two functions Split gfs2_lm_withdraw into a function that prints an error message and a function that withdraws the filesystem. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/glops.c | 3 +- fs/gfs2/log.c | 19 +++++---- fs/gfs2/sys.c | 3 +- fs/gfs2/util.c | 124 +++++++++++++++++++++++++++++--------------------------- fs/gfs2/util.h | 3 +- 5 files changed, 82 insertions(+), 70 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 061d22e1ceb6..58431f67665e 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -39,7 +39,8 @@ static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) fs_err(gl->gl_name.ln_sbd, "AIL glock %u:%llu mapping %p\n", gl->gl_name.ln_type, gl->gl_name.ln_number, gfs2_glock2aspace(gl)); - gfs2_lm_withdraw(gl->gl_name.ln_sbd, "AIL error\n"); + gfs2_lm(gl->gl_name.ln_sbd, "AIL error\n"); + gfs2_withdraw(gl->gl_name.ln_sbd); } /** diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 00a2e721a374..c4c7c013f7a7 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -165,7 +165,7 @@ restart: spin_unlock(&sdp->sd_ail_lock); blk_finish_plug(&plug); if (withdraw) - gfs2_lm_withdraw(sdp, NULL); + gfs2_withdraw(sdp); trace_gfs2_ail_flush(sdp, wbc, 0); } @@ -239,8 +239,10 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp) ret = list_empty(&sdp->sd_ail1_list); spin_unlock(&sdp->sd_ail_lock); - if (withdraw) - gfs2_lm_withdraw(sdp, "fatal: I/O error(s)\n"); + if (withdraw) { + gfs2_lm(sdp, "fatal: I/O error(s)\n"); + gfs2_withdraw(sdp); + } return ret; } @@ -1016,11 +1018,12 @@ int gfs2_logd(void *data) /* Check for errors writing to the journal */ if (sdp->sd_log_error) { - gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: error %d: " - "withdrawing the file system to " - "prevent further damage.\n", - sdp->sd_fsname, sdp->sd_log_error); + gfs2_lm(sdp, + "GFS2: fsid=%s: error %d: " + "withdrawing the file system to " + "prevent further damage.\n", + sdp->sd_fsname, sdp->sd_log_error); + gfs2_withdraw(sdp); } did_flush = false; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 8ccb68f4ed16..a2eae5c578d6 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -136,7 +136,8 @@ static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len) if (val != 1) return -EINVAL; - gfs2_lm_withdraw(sdp, "withdrawing from cluster at user's request\n"); + gfs2_lm(sdp, "withdrawing from cluster at user's request\n"); + gfs2_withdraw(sdp); return len; } diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index ec600b487498..322012e2064e 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -33,28 +33,31 @@ void gfs2_assert_i(struct gfs2_sbd *sdp) fs_emerg(sdp, "fatal assertion failed\n"); } -int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...) +void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && + test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + fs_err(sdp, "%pV", &vaf); + va_end(args); +} + +int gfs2_withdraw(struct gfs2_sbd *sdp) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; const struct lm_lockops *lm = ls->ls_ops; - va_list args; - struct va_format vaf; if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) return 0; - if (fmt) { - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - fs_err(sdp, "%pV", &vaf); - - va_end(args); - } - if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { fs_err(sdp, "about to withdraw this file system\n"); BUG_ON(sdp->sd_args.ar_debug); @@ -89,10 +92,12 @@ int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, const char *function, char *file, unsigned int line) { int me; - me = gfs2_lm_withdraw(sdp, - "fatal: assertion \"%s\" failed\n" - " function = %s, file = %s, line = %u\n", - assertion, function, file, line); + + gfs2_lm(sdp, + "fatal: assertion \"%s\" failed\n" + " function = %s, file = %s, line = %u\n", + assertion, function, file, line); + me = gfs2_withdraw(sdp); dump_stack(); return (me) ? -1 : -2; } @@ -140,11 +145,10 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function, char *file, unsigned int line) { - int rv; - rv = gfs2_lm_withdraw(sdp, - "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n", - function, file, line); - return rv; + gfs2_lm(sdp, + "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n", + function, file, line); + return gfs2_withdraw(sdp); } /** @@ -157,15 +161,15 @@ int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, const char *function, char *file, unsigned int line) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - int rv; - rv = gfs2_lm_withdraw(sdp, - "fatal: filesystem consistency error\n" - " inode = %llu %llu\n" - " function = %s, file = %s, line = %u\n", - (unsigned long long)ip->i_no_formal_ino, - (unsigned long long)ip->i_no_addr, - function, file, line); - return rv; + + gfs2_lm(sdp, + "fatal: filesystem consistency error\n" + " inode = %llu %llu\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)ip->i_no_formal_ino, + (unsigned long long)ip->i_no_addr, + function, file, line); + return gfs2_withdraw(sdp); } /** @@ -179,17 +183,16 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, { struct gfs2_sbd *sdp = rgd->rd_sbd; char fs_id_buf[sizeof(sdp->sd_fsname) + 7]; - int rv; sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf); - rv = gfs2_lm_withdraw(sdp, - "fatal: filesystem consistency error\n" - " RG = %llu\n" - " function = %s, file = %s, line = %u\n", - (unsigned long long)rgd->rd_addr, - function, file, line); - return rv; + gfs2_lm(sdp, + "fatal: filesystem consistency error\n" + " RG = %llu\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)rgd->rd_addr, + function, file, line); + return gfs2_withdraw(sdp); } /** @@ -203,12 +206,14 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, unsigned int line) { int me; - me = gfs2_lm_withdraw(sdp, - "fatal: invalid metadata block\n" - " bh = %llu (%s)\n" - " function = %s, file = %s, line = %u\n", - (unsigned long long)bh->b_blocknr, type, - function, file, line); + + gfs2_lm(sdp, + "fatal: invalid metadata block\n" + " bh = %llu (%s)\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)bh->b_blocknr, type, + function, file, line); + me = gfs2_withdraw(sdp); return (me) ? -1 : -2; } @@ -223,12 +228,14 @@ int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, char *file, unsigned int line) { int me; - me = gfs2_lm_withdraw(sdp, - "fatal: invalid metadata block\n" - " bh = %llu (type: exp=%u, found=%u)\n" - " function = %s, file = %s, line = %u\n", - (unsigned long long)bh->b_blocknr, type, t, - function, file, line); + + gfs2_lm(sdp, + "fatal: invalid metadata block\n" + " bh = %llu (type: exp=%u, found=%u)\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)bh->b_blocknr, type, t, + function, file, line); + me = gfs2_withdraw(sdp); return (me) ? -1 : -2; } @@ -241,12 +248,11 @@ int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, unsigned int line) { - int rv; - rv = gfs2_lm_withdraw(sdp, - "fatal: I/O error\n" - " function = %s, file = %s, line = %u\n", - function, file, line); - return rv; + gfs2_lm(sdp, + "fatal: I/O error\n" + " function = %s, file = %s, line = %u\n", + function, file, line); + return gfs2_withdraw(sdp); } /** @@ -266,6 +272,6 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, (unsigned long long)bh->b_blocknr, function, file, line); if (withdraw) - gfs2_lm_withdraw(sdp, NULL); + gfs2_withdraw(sdp); } diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index f2702bc9837c..fdc218a28609 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -177,6 +177,7 @@ static inline bool gfs2_withdrawn(struct gfs2_sbd *sdp) gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) __printf(2, 3) -int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...); +void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...); +int gfs2_withdraw(struct gfs2_sbd *sdp); #endif /* __UTIL_DOT_H__ */ -- cgit v1.2.3 From 8dc88ac68df89851488a60b8f1582fe466f41a64 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 23 Jan 2020 19:19:38 +0100 Subject: gfs2: Report errors before withdraw In gfs2_rgrp_verify and compute_bitstructs, make sure to report errors before withdrawing the filesystem: otherwise, when we withdraw first and withdraw is configured to panic, we'll never get to the error reporting. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/rgrp.c | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index e7bf91ec231c..2bdd662deff5 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -457,24 +457,24 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) } if (count[0] != rgd->rd_free) { - if (gfs2_consist_rgrpd(rgd)) - fs_err(sdp, "free data mismatch: %u != %u\n", - count[0], rgd->rd_free); + gfs2_lm(sdp, "free data mismatch: %u != %u\n", + count[0], rgd->rd_free); + gfs2_consist_rgrpd(rgd); return; } tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes; if (count[1] != tmp) { - if (gfs2_consist_rgrpd(rgd)) - fs_err(sdp, "used data mismatch: %u != %u\n", - count[1], tmp); + gfs2_lm(sdp, "used data mismatch: %u != %u\n", + count[1], tmp); + gfs2_consist_rgrpd(rgd); return; } if (count[2] + count[3] != rgd->rd_dinodes) { - if (gfs2_consist_rgrpd(rgd)) - fs_err(sdp, "used metadata mismatch: %u != %u\n", - count[2] + count[3], rgd->rd_dinodes); + gfs2_lm(sdp, "used metadata mismatch: %u != %u\n", + count[2] + count[3], rgd->rd_dinodes); + gfs2_consist_rgrpd(rgd); return; } } @@ -733,17 +733,6 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) } } -static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) -{ - struct gfs2_sbd *sdp = rgd->rd_sbd; - - fs_info(sdp, "ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); - fs_info(sdp, "ri_length = %u\n", rgd->rd_length); - fs_info(sdp, "ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); - fs_info(sdp, "ri_data = %u\n", rgd->rd_data); - fs_info(sdp, "ri_bitbytes = %u\n", rgd->rd_bitbytes); -} - /** * gfs2_compute_bitstructs - Compute the bitmap sizes * @rgd: The resource group descriptor @@ -814,11 +803,20 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) } bi = rgd->rd_bits + (length - 1); if ((bi->bi_start + bi->bi_bytes) * GFS2_NBBY != rgd->rd_data) { - if (gfs2_consist_rgrpd(rgd)) { - gfs2_rindex_print(rgd); - fs_err(sdp, "start=%u len=%u offset=%u\n", - bi->bi_start, bi->bi_bytes, bi->bi_offset); - } + gfs2_lm(sdp, + "ri_addr = %llu\n" + "ri_length = %u\n" + "ri_data0 = %llu\n" + "ri_data = %u\n" + "ri_bitbytes = %u\n" + "start=%u len=%u offset=%u\n", + (unsigned long long)rgd->rd_addr, + rgd->rd_length, + (unsigned long long)rgd->rd_data0, + rgd->rd_data, + rgd->rd_bitbytes, + bi->bi_start, bi->bi_bytes, bi->bi_offset); + gfs2_consist_rgrpd(rgd); return -EIO; } -- cgit v1.2.3 From d7e7ab3f1e2283ef956bb94fee09e80dbc1c46e9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 23 Jan 2020 19:25:05 +0100 Subject: gfs2: Remove usused cluster_wide arguments of gfs2_consist functions These arguments are always passed as 0, and they are never evaluated. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/util.c | 6 +++--- fs/gfs2/util.h | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 322012e2064e..74247d686cf7 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -142,7 +142,7 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, * 0 if it was already withdrawn */ -int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function, +int gfs2_consist_i(struct gfs2_sbd *sdp, const char *function, char *file, unsigned int line) { gfs2_lm(sdp, @@ -157,7 +157,7 @@ int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function, * 0 if it was already withdrawn */ -int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, +int gfs2_consist_inode_i(struct gfs2_inode *ip, const char *function, char *file, unsigned int line) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); @@ -178,7 +178,7 @@ int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, * 0 if it was already withdrawn */ -int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, +int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, const char *function, char *file, unsigned int line) { struct gfs2_sbd *sdp = rgd->rd_sbd; diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index fdc218a28609..2c2940d55a35 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -52,25 +52,25 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, __func__, __FILE__, __LINE__)) -int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, +int gfs2_consist_i(struct gfs2_sbd *sdp, const char *function, char *file, unsigned int line); #define gfs2_consist(sdp) \ -gfs2_consist_i((sdp), 0, __func__, __FILE__, __LINE__) +gfs2_consist_i((sdp), __func__, __FILE__, __LINE__) -int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, +int gfs2_consist_inode_i(struct gfs2_inode *ip, const char *function, char *file, unsigned int line); #define gfs2_consist_inode(ip) \ -gfs2_consist_inode_i((ip), 0, __func__, __FILE__, __LINE__) +gfs2_consist_inode_i((ip), __func__, __FILE__, __LINE__) -int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, +int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, const char *function, char *file, unsigned int line); #define gfs2_consist_rgrpd(rgd) \ -gfs2_consist_rgrpd_i((rgd), 0, __func__, __FILE__, __LINE__) +gfs2_consist_rgrpd_i((rgd), __func__, __FILE__, __LINE__) int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, -- cgit v1.2.3 From a5ca2f1cb66b1ef93bbd58b55b22cb50163a03db Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 23 Jan 2020 19:31:06 +0100 Subject: gfs2: Turn gfs2_consist into void functions Change the various gfs2_consist functions to return void. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/util.c | 24 +++++++++--------------- fs/gfs2/util.h | 12 ++++++------ 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 74247d686cf7..698eb5952438 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -138,27 +138,23 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, /** * gfs2_consist_i - Flag a filesystem consistency error and withdraw - * Returns: -1 if this call withdrew the machine, - * 0 if it was already withdrawn */ -int gfs2_consist_i(struct gfs2_sbd *sdp, const char *function, - char *file, unsigned int line) +void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function, + char *file, unsigned int line) { gfs2_lm(sdp, "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n", function, file, line); - return gfs2_withdraw(sdp); + gfs2_withdraw(sdp); } /** * gfs2_consist_inode_i - Flag an inode consistency error and withdraw - * Returns: -1 if this call withdrew the machine, - * 0 if it was already withdrawn */ -int gfs2_consist_inode_i(struct gfs2_inode *ip, - const char *function, char *file, unsigned int line) +void gfs2_consist_inode_i(struct gfs2_inode *ip, + const char *function, char *file, unsigned int line) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); @@ -169,17 +165,15 @@ int gfs2_consist_inode_i(struct gfs2_inode *ip, (unsigned long long)ip->i_no_formal_ino, (unsigned long long)ip->i_no_addr, function, file, line); - return gfs2_withdraw(sdp); + gfs2_withdraw(sdp); } /** * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw - * Returns: -1 if this call withdrew the machine, - * 0 if it was already withdrawn */ -int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, - const char *function, char *file, unsigned int line) +void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, + const char *function, char *file, unsigned int line) { struct gfs2_sbd *sdp = rgd->rd_sbd; char fs_id_buf[sizeof(sdp->sd_fsname) + 7]; @@ -192,7 +186,7 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, " function = %s, file = %s, line = %u\n", (unsigned long long)rgd->rd_addr, function, file, line); - return gfs2_withdraw(sdp); + gfs2_withdraw(sdp); } /** diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 2c2940d55a35..93e089327216 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -52,22 +52,22 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, __func__, __FILE__, __LINE__)) -int gfs2_consist_i(struct gfs2_sbd *sdp, - const char *function, char *file, unsigned int line); +void gfs2_consist_i(struct gfs2_sbd *sdp, + const char *function, char *file, unsigned int line); #define gfs2_consist(sdp) \ gfs2_consist_i((sdp), __func__, __FILE__, __LINE__) -int gfs2_consist_inode_i(struct gfs2_inode *ip, - const char *function, char *file, unsigned int line); +void gfs2_consist_inode_i(struct gfs2_inode *ip, + const char *function, char *file, unsigned int line); #define gfs2_consist_inode(ip) \ gfs2_consist_inode_i((ip), __func__, __FILE__, __LINE__) -int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, - const char *function, char *file, unsigned int line); +void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, + const char *function, char *file, unsigned int line); #define gfs2_consist_rgrpd(rgd) \ gfs2_consist_rgrpd_i((rgd), __func__, __FILE__, __LINE__) -- cgit v1.2.3 From 8e28ef1f2fa176854f96fb0416f2aaf5516793d0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 23 Jan 2020 19:36:21 +0100 Subject: gfs2: Return bool from gfs2_assert functions The gfs2_assert functions only print messages when the filesystem hasn't been withdrawn yet, and they indicate whether or not they've printed something in their return value. However, none of the callers use that information, so simply return whether or not the assert has failed. (The gfs2_assert functions are still backwards; they return false when an assertion is true.) Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/util.c | 21 ++++++--------------- fs/gfs2/util.h | 28 ++++++++++++++++++---------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 698eb5952438..ec8e8c5ce848 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -84,37 +84,30 @@ int gfs2_withdraw(struct gfs2_sbd *sdp) /** * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false - * Returns: -1 if this call withdrew the machine, - * -2 if it was already withdrawn */ -int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, - const char *function, char *file, unsigned int line) +void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, + const char *function, char *file, unsigned int line) { - int me; - gfs2_lm(sdp, "fatal: assertion \"%s\" failed\n" " function = %s, file = %s, line = %u\n", assertion, function, file, line); - me = gfs2_withdraw(sdp); + gfs2_withdraw(sdp); dump_stack(); - return (me) ? -1 : -2; } /** * gfs2_assert_warn_i - Print a message to the console if @assertion is false - * Returns: -1 if we printed something - * -2 if we didn't */ -int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, - const char *function, char *file, unsigned int line) +void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, + const char *function, char *file, unsigned int line) { if (time_before(jiffies, sdp->sd_last_warning + gfs2_tune_get(sdp, gt_complain_secs) * HZ)) - return -2; + return; if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n", @@ -132,8 +125,6 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, sdp->sd_fsname, function, file, line); sdp->sd_last_warning = jiffies; - - return -1; } /** diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 93e089327216..572399e75ce6 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -36,21 +36,29 @@ do { \ } while (0) -int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, - const char *function, char *file, unsigned int line); +void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, + const char *function, char *file, unsigned int line); #define gfs2_assert_withdraw(sdp, assertion) \ -((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \ - __func__, __FILE__, __LINE__)) + ({ \ + bool _bool = (assertion); \ + if (unlikely(!_bool)) \ + gfs2_assert_withdraw_i((sdp), #assertion, \ + __func__, __FILE__, __LINE__); \ + !_bool; \ + }) - -int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, - const char *function, char *file, unsigned int line); +void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, + const char *function, char *file, unsigned int line); #define gfs2_assert_warn(sdp, assertion) \ -((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \ - __func__, __FILE__, __LINE__)) - + ({ \ + bool _bool = (assertion); \ + if (unlikely(!_bool)) \ + gfs2_assert_warn_i((sdp), #assertion, \ + __func__, __FILE__, __LINE__); \ + !_bool; \ + }) void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function, char *file, unsigned int line); -- cgit v1.2.3 From 69511080bd6efd34f4e020fcde6cf73bb4a61af6 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 12 Feb 2019 13:43:55 -0700 Subject: gfs2: Introduce concept of a pending withdraw File system withdraws can be delayed when inconsistencies are discovered when we cannot withdraw immediately, for example, when critical spin_locks are held. But delaying the withdraw can cause gfs2 to ignore the error and keep running for a short period of time. For example, an rgrp glock may be dequeued and demoted while there are still buffers that haven't been properly revoked, due to io errors writing to the journal. This patch introduces a new concept of a pending withdraw, which means an inconsistency has been discovered and we need to withdraw at the earliest possible opportunity. In these cases, we aren't quite withdrawn yet, but we still need to not dequeue glocks and other critical things. If we dequeue the glocks and the withdraw results in our journal being replayed, the replay could overwrite data that's been modified by a different node that acquired the glock in the meantime. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/incore.h | 1 + fs/gfs2/log.c | 21 ++++++++------------- fs/gfs2/util.c | 14 +++++++------- fs/gfs2/util.h | 12 +++++++++++- 4 files changed, 27 insertions(+), 21 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 9fd88ed18807..3cd2de3db40a 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -618,6 +618,7 @@ enum { SDF_FORCE_AIL_FLUSH = 9, SDF_AIL1_IO_ERROR = 10, SDF_FS_FROZEN = 11, + SDF_WITHDRAWING = 12, /* Will withdraw eventually */ }; enum gfs2_freeze_state { diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index c4c7c013f7a7..d1ab04135b2f 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -88,8 +88,7 @@ static void gfs2_remove_from_ail(struct gfs2_bufdata *bd) static int gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct writeback_control *wbc, - struct gfs2_trans *tr, - bool *withdraw) + struct gfs2_trans *tr) __releases(&sdp->sd_ail_lock) __acquires(&sdp->sd_ail_lock) { @@ -108,7 +107,7 @@ __acquires(&sdp->sd_ail_lock) !test_and_set_bit(SDF_AIL1_IO_ERROR, &sdp->sd_flags)) { gfs2_io_error_bh(sdp, bh); - *withdraw = true; + gfs2_withdraw_delayed(sdp); } list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); continue; @@ -149,7 +148,6 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) struct list_head *head = &sdp->sd_ail1_list; struct gfs2_trans *tr; struct blk_plug plug; - bool withdraw = false; trace_gfs2_ail_flush(sdp, wbc, 1); blk_start_plug(&plug); @@ -158,13 +156,12 @@ restart: list_for_each_entry_reverse(tr, head, tr_list) { if (wbc->nr_to_write <= 0) break; - if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw) && - !gfs2_withdrawn(sdp)) + if (gfs2_ail1_start_one(sdp, wbc, tr) && !gfs2_withdrawn(sdp)) goto restart; } spin_unlock(&sdp->sd_ail_lock); blk_finish_plug(&plug); - if (withdraw) + if (test_bit(SDF_WITHDRAWING, &sdp->sd_flags)) gfs2_withdraw(sdp); trace_gfs2_ail_flush(sdp, wbc, 0); } @@ -193,8 +190,7 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp) * */ -static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, - bool *withdraw) +static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { struct gfs2_bufdata *bd, *s; struct buffer_head *bh; @@ -208,7 +204,7 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, if (!buffer_uptodate(bh) && !test_and_set_bit(SDF_AIL1_IO_ERROR, &sdp->sd_flags)) { gfs2_io_error_bh(sdp, bh); - *withdraw = true; + gfs2_withdraw_delayed(sdp); } list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); } @@ -226,11 +222,10 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp) struct gfs2_trans *tr, *s; int oldest_tr = 1; int ret; - bool withdraw = false; spin_lock(&sdp->sd_ail_lock); list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { - gfs2_ail1_empty_one(sdp, tr, &withdraw); + gfs2_ail1_empty_one(sdp, tr); if (list_empty(&tr->tr_ail1_list) && oldest_tr) list_move(&tr->tr_list, &sdp->sd_ail2_list); else @@ -239,7 +234,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp) ret = list_empty(&sdp->sd_ail1_list); spin_unlock(&sdp->sd_ail_lock); - if (withdraw) { + if (test_bit(SDF_WITHDRAWING, &sdp->sd_flags)) { gfs2_lm(sdp, "fatal: I/O error(s)\n"); gfs2_withdraw(sdp); } diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index ec8e8c5ce848..47cd40de08b1 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -249,13 +249,13 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, const char *function, char *file, unsigned int line, bool withdraw) { - if (!gfs2_withdrawn(sdp)) - fs_err(sdp, - "fatal: I/O error\n" - " block = %llu\n" - " function = %s, file = %s, line = %u\n", - (unsigned long long)bh->b_blocknr, - function, file, line); + if (gfs2_withdrawn(sdp)) + return; + + fs_err(sdp, "fatal: I/O error\n" + " block = %llu\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)bh->b_blocknr, function, file, line); if (withdraw) gfs2_withdraw(sdp); } diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 572399e75ce6..16b2cc6c4560 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -172,13 +172,23 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, return x; } +/** + * gfs2_withdraw_delayed - withdraw as soon as possible without deadlocks + * @sdp: the superblock + */ +static inline void gfs2_withdraw_delayed(struct gfs2_sbd *sdp) +{ + set_bit(SDF_WITHDRAWING, &sdp->sd_flags); +} + /** * gfs2_withdrawn - test whether the file system is withdrawing or withdrawn * @sdp: the superblock */ static inline bool gfs2_withdrawn(struct gfs2_sbd *sdp) { - return test_bit(SDF_WITHDRAWN, &sdp->sd_flags); + return test_bit(SDF_WITHDRAWN, &sdp->sd_flags) || + test_bit(SDF_WITHDRAWING, &sdp->sd_flags); } #define gfs2_tune_get(sdp, field) \ -- cgit v1.2.3 From 30fe70a85a909a23dcbc2c628ca6655b2c85e7a1 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 13 Nov 2019 11:47:09 -0600 Subject: gfs2: clear ail1 list when gfs2 withdraws This patch fixes a bug in which function gfs2_log_flush can get into an infinite loop when a gfs2 file system is withdrawn. The problem is the infinite loop "for (;;)" in gfs2_log_flush which would never finish because the io error and subsequent withdraw prevented the items from being taken off the ail list. This patch tries to clean up the mess by allowing withdraw situations to move not-in-flight buffer_heads to the ail2 list, where they will be dealt with later. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/log.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index d1ab04135b2f..9ebec6f93fa3 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -103,16 +103,22 @@ __acquires(&sdp->sd_ail_lock) gfs2_assert(sdp, bd->bd_tr == tr); if (!buffer_busy(bh)) { - if (!buffer_uptodate(bh) && - !test_and_set_bit(SDF_AIL1_IO_ERROR, + if (buffer_uptodate(bh)) { + list_move(&bd->bd_ail_st_list, + &tr->tr_ail2_list); + continue; + } + if (!test_and_set_bit(SDF_AIL1_IO_ERROR, &sdp->sd_flags)) { gfs2_io_error_bh(sdp, bh); gfs2_withdraw_delayed(sdp); } - list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); - continue; } + if (gfs2_withdrawn(sdp)) { + gfs2_remove_from_ail(bd); + continue; + } if (!buffer_dirty(bh)) continue; if (gl == bd->bd_gl) @@ -859,6 +865,8 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) if (gfs2_ail1_empty(sdp)) break; } + if (gfs2_withdrawn(sdp)) + goto out; atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ trace_gfs2_log_blocks(sdp, -1); log_write_header(sdp, flags); @@ -871,6 +879,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); } +out: trace_gfs2_log_flush(sdp, 0, flags); up_write(&sdp->sd_log_flush_lock); -- cgit v1.2.3 From b3422cacdd7e623e473b4c3977f3ee65e1fed62f Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 13 Nov 2019 11:50:30 -0600 Subject: gfs2: Rework how rgrp buffer_heads are managed Before this patch, the rgrp code had a serious problem related to how it managed buffer_heads for resource groups. The problem caused file system corruption, especially in cases of journal replay. When an rgrp glock was demoted to transfer ownership to a different cluster node, do_xmote() first calls rgrp_go_sync and then rgrp_go_inval, as expected. When it calls rgrp_go_sync, that called gfs2_rgrp_brelse() that dropped the buffer_head reference count. In most cases, the reference count went to zero, which is right. However, there were other places where the buffers are handled differently. After rgrp_go_sync, do_xmote called rgrp_go_inval which called gfs2_rgrp_brelse a second time, then rgrp_go_inval's call to truncate_inode_pages_range would get rid of the pages in memory, but only if the reference count drops to 0. Unfortunately, gfs2_rgrp_brelse was setting bi->bi_bh = NULL. So when rgrp_go_sync called gfs2_rgrp_brelse, it lost the pointer to the buffer_heads in cases where the reference count was still 1. Therefore, when rgrp_go_inval called gfs2_rgrp_brelse a second time, it failed the check for "if (bi->bi_bh)" and thus failed to call brelse a second time. Because of that, the reference count on those buffers sometimes failed to drop from 1 to 0. And that caused function truncate_inode_pages_range to keep the pages in page cache rather than freeing them. The next time the rgrp glock was acquired, the metadata read of the rgrp buffers re-used the pages in memory, which were now wrong because they were likely modified by the other node who acquired the glock in EX (which is why we demoted the glock). This re-use of the page cache caused corruption because changes made by the other nodes were never seen, so the bitmaps were inaccurate. For some reason, the problem became most apparent when journal replay forced the replay of rgrps in memory, which caused newer rgrp data to be overwritten by the older in-core pages. A big part of the problem was that the rgrp buffer were released in multiple places: The go_unlock function would release them when the glock was released rather than when the glock is demoted, which is clearly wrong because our intent was to cache them until the glock is demoted from SH or EX. This patch attempts to clean up the mess and make one consistent and centralized mechanism for managing the rgrp buffer_heads by implementing several changes: 1. It eliminates the call to gfs2_rgrp_brelse() from rgrp_go_sync. We don't want to release the buffers or zero the pointers when syncing for the reasons stated above. It only makes sense to release them when the glock is actually invalidated (go_inval). And when we do, then we set the bh pointers to NULL. 2. The go_unlock function (which was only used for rgrps) is eliminated, as we've talked about doing many times before. The go_unlock function was called too early in the glock dq process, and should not happen until the glock is invalidated. 3. It also eliminates the call to rgrp_brelse in gfs2_clear_rgrpd. That will now happen automatically when the rgrp glocks are demoted, and shouldn't happen any sooner or later than that. Instead, function gfs2_clear_rgrpd has been modified to demote the rgrp glocks, and therefore, free those pages, before the remaining glocks are culled by gfs2_gl_hash_clear. This prevents the gl_object from hanging around when the glocks are culled. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 8 -------- fs/gfs2/glops.c | 9 +-------- fs/gfs2/incore.h | 1 - fs/gfs2/rgrp.c | 23 +++++------------------ fs/gfs2/rgrp.h | 1 - 5 files changed, 6 insertions(+), 36 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d0eceaff3cea..1cb471a8bc87 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1241,7 +1241,6 @@ int gfs2_glock_poll(struct gfs2_holder *gh) void gfs2_glock_dq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - const struct gfs2_glock_operations *glops = gl->gl_ops; unsigned delay = 0; int fast_path = 0; @@ -1252,13 +1251,6 @@ void gfs2_glock_dq(struct gfs2_holder *gh) list_del_init(&gh->gh_list); clear_bit(HIF_HOLDER, &gh->gh_iflags); if (find_first_holder(gl) == NULL) { - if (glops->go_unlock) { - GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags)); - spin_unlock(&gl->gl_lockref.lock); - glops->go_unlock(gh); - spin_lock(&gl->gl_lockref.lock); - clear_bit(GLF_LOCK, &gl->gl_flags); - } if (list_empty(&gl->gl_holders) && !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && !test_bit(GLF_DEMOTE, &gl->gl_flags)) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 58431f67665e..1c557457c753 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -145,15 +145,9 @@ static void rgrp_go_sync(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct address_space *mapping = &sdp->sd_aspace; - struct gfs2_rgrpd *rgd; + struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl); int error; - spin_lock(&gl->gl_lockref.lock); - rgd = gl->gl_object; - if (rgd) - gfs2_rgrp_brelse(rgd); - spin_unlock(&gl->gl_lockref.lock); - if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) return; GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); @@ -601,7 +595,6 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_sync = rgrp_go_sync, .go_inval = rgrp_go_inval, .go_lock = gfs2_rgrp_go_lock, - .go_unlock = gfs2_rgrp_go_unlock, .go_dump = gfs2_rgrp_dump, .go_type = LM_TYPE_RGRP, .go_flags = GLOF_LVB, diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 3cd2de3db40a..b95c8a31d309 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -239,7 +239,6 @@ struct gfs2_glock_operations { void (*go_inval) (struct gfs2_glock *gl, int flags); int (*go_demote_ok) (const struct gfs2_glock *gl); int (*go_lock) (struct gfs2_holder *gh); - void (*go_unlock) (struct gfs2_holder *gh); void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl, const char *fs_id_buf); void (*go_callback)(struct gfs2_glock *gl, bool remote); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 2bdd662deff5..2ee2f7d48bc1 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -720,8 +720,12 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) rb_erase(n, &sdp->sd_rindex_tree); if (gl) { - glock_clear_object(gl, rgd); + if (gl->gl_state != LM_ST_UNLOCKED) { + gfs2_glock_cb(gl, LM_ST_UNLOCKED); + flush_delayed_work(&gl->gl_work); + } gfs2_rgrp_brelse(rgd); + glock_clear_object(gl, rgd); gfs2_glock_put(gl); } @@ -1284,23 +1288,6 @@ void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd) bi->bi_bh = NULL; } } - -} - -/** - * gfs2_rgrp_go_unlock - Unlock a rgrp glock - * @gh: The glock holder for the resource group - * - */ - -void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) -{ - struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; - int demote_requested = test_bit(GLF_DEMOTE, &gh->gh_gl->gl_flags) | - test_bit(GLF_PENDING_DEMOTE, &gh->gh_gl->gl_flags); - - if (rgd && demote_requested) - gfs2_rgrp_brelse(rgd); } int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index c14a673ae36f..a584f3096418 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -33,7 +33,6 @@ extern int gfs2_rindex_update(struct gfs2_sbd *sdp); extern void gfs2_free_clones(struct gfs2_rgrpd *rgd); extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh); extern void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd); -extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); -- cgit v1.2.3 From 036330c914365f449ead353ef152fb29411cd4cb Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 10 Apr 2019 11:46:35 -0600 Subject: gfs2: log error reform Before this patch, gfs2 kept track of journal io errors in two places sd_log_error and the SDF_AIL1_IO_ERROR flag in sd_flags. This patch consolidates the two into sd_log_error so that it reflects the first error encountered writing to the journal. In future patches, we will take advantage of this by checking this value rather than having to check both when reacting to io errors. In addition, this fixes a tight loop in unmount: If buffers get on the ail1 list and an io error occurs elsewhere, the ail1 list would never be cleared because they were always busy. So unmount would hang, waiting for the ail1 list to empty. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/incore.h | 7 +++---- fs/gfs2/log.c | 16 ++++++++++++---- fs/gfs2/quota.c | 2 +- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index b95c8a31d309..ab89f746b3b6 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -615,9 +615,8 @@ enum { SDF_RORECOVERY = 7, /* read only recovery */ SDF_SKIP_DLM_UNLOCK = 8, SDF_FORCE_AIL_FLUSH = 9, - SDF_AIL1_IO_ERROR = 10, - SDF_FS_FROZEN = 11, - SDF_WITHDRAWING = 12, /* Will withdraw eventually */ + SDF_FS_FROZEN = 10, + SDF_WITHDRAWING = 11, /* Will withdraw eventually */ }; enum gfs2_freeze_state { @@ -828,7 +827,7 @@ struct gfs2_sbd { atomic_t sd_log_in_flight; struct bio *sd_log_bio; wait_queue_head_t sd_log_flush_wait; - int sd_log_error; + int sd_log_error; /* First log error */ atomic_t sd_reserving_log; wait_queue_head_t sd_reserving_log_wait; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 9ebec6f93fa3..584bb7ce15bf 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -108,8 +108,7 @@ __acquires(&sdp->sd_ail_lock) &tr->tr_ail2_list); continue; } - if (!test_and_set_bit(SDF_AIL1_IO_ERROR, - &sdp->sd_flags)) { + if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) { gfs2_io_error_bh(sdp, bh); gfs2_withdraw_delayed(sdp); } @@ -205,10 +204,19 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) bd_ail_st_list) { bh = bd->bd_bh; gfs2_assert(sdp, bd->bd_tr == tr); - if (buffer_busy(bh)) + /* + * If another process flagged an io error, e.g. writing to the + * journal, error all other bhs and move them off the ail1 to + * prevent a tight loop when unmount tries to flush ail1, + * regardless of whether they're still busy. If no outside + * errors were found and the buffer is busy, move to the next. + * If the ail buffer is not busy and caught an error, flag it + * for others. + */ + if (!sdp->sd_log_error && buffer_busy(bh)) continue; if (!buffer_uptodate(bh) && - !test_and_set_bit(SDF_AIL1_IO_ERROR, &sdp->sd_flags)) { + !cmpxchg(&sdp->sd_log_error, 0, -EIO)) { gfs2_io_error_bh(sdp, bh); gfs2_withdraw_delayed(sdp); } diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index e9f93045eb01..ca2194cfa38e 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1477,7 +1477,7 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) return; if (!gfs2_withdrawn(sdp)) { fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error); - sdp->sd_log_error = error; + cmpxchg(&sdp->sd_log_error, 0, error); wake_up(&sdp->sd_logd_waitq); } } -- cgit v1.2.3 From f34a6135ce723cf7940729ab0b2607a753ebb580 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 16 Apr 2019 12:23:28 -0600 Subject: gfs2: Only complain the first time an io error occurs in quota or log Before this patch, all io errors received by the quota daemon or the logd daemon would cause a complaint message to be issued, such as: gfs2: fsid=dm-13.0: Error 10 writing to journal, jid=0 This patch changes it so that the error message is only issued the first time the error is encountered. Also, before this patch function gfs2_end_log_write did not set the sd_log_error value, so log errors would not cause the file system to be withdrawn. This patch sets the error code so the file system is properly withdrawn if an io error is encountered writing to the journal. WARNING: This change in function breaks check xfstests generic/441 and causes it to fail: io errors writing to the log should cause a file system to be withdrawn, and no further operations are tolerated. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/lops.c | 5 +++-- fs/gfs2/quota.c | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index c090d5ad3f22..0af2e5ff0d97 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -203,8 +203,9 @@ static void gfs2_end_log_write(struct bio *bio) struct bvec_iter_all iter_all; if (bio->bi_status) { - fs_err(sdp, "Error %d writing to journal, jid=%u\n", - bio->bi_status, sdp->sd_jdesc->jd_jid); + if (!cmpxchg(&sdp->sd_log_error, 0, (int)bio->bi_status)) + fs_err(sdp, "Error %d writing to journal, jid=%u\n", + bio->bi_status, sdp->sd_jdesc->jd_jid); wake_up(&sdp->sd_logd_waitq); } diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index ca2194cfa38e..dbe87b2b55af 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1476,8 +1476,8 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) if (error == 0 || error == -EROFS) return; if (!gfs2_withdrawn(sdp)) { - fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error); - cmpxchg(&sdp->sd_log_error, 0, error); + if (!cmpxchg(&sdp->sd_log_error, 0, error)) + fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error); wake_up(&sdp->sd_logd_waitq); } } -- cgit v1.2.3 From 03678a99d13831b938fdc9d81b5a7608fc6ef416 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 12 Feb 2019 13:58:40 -0700 Subject: gfs2: Ignore dlm recovery requests if gfs2 is withdrawn When a node fails, user space informs dlm of the node failure, and dlm instructs gfs2 on the surviving nodes to perform journal recovery. It does this by calling various callback functions in lock_dlm.c. To mark its progress, it keeps generation numbers and recover bits in a dlm "control" lock lvb, which is seen by all nodes to determine which journals need to be replayed. The gfs2 on all nodes get the same recovery requests from dlm, so they all try to do the recovery, but only one will be granted the exclusive lock on the journal. The others fail with a "Busy" message on their "try lock." However, when a node is withdrawn, it cannot safely do any recovery or replay any journals. To make matters worse, gfs2 might withdraw as a result of attempting recovery. For example, this might happen if the device goes offline, or if an hba fails. But in today's gfs2 code, it doesn't check for being withdrawn at any step in the recovery process. What's worse is that these callbacks from dlm have no return code, so there is no way to indicate failure back to dlm. We can send a "Recovery failed" uevent eventually, but that tells user space what happened, not dlm's kernel code. Before this patch, lock_dlm would perform its recovery steps but ignore the result, and eventually it would still update its generation number in the lvb, despite the fact that it may have withdrawn or encountered an error. The other nodes would then see the newer generation number in the lvb and conclude that they don't need to do recovery because the generation number is newer than the last one they saw. They think a different node has already recovered the journal. This patch adds checks to several of the callbacks used by dlm in its recovery state machine so that the functions are ignored and skipped if an io error has occurred or if the file system is withdrawn. That prevents the lvb bits from being updated, and therefore dlm and user space still see the need for recovery to take place. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/lock_dlm.c | 18 ++++++++++++++++++ fs/gfs2/recovery.c | 5 +++++ 2 files changed, 23 insertions(+) diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 7c7197343ee2..57fdf53d2246 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -1079,6 +1079,10 @@ static void gdlm_recover_prep(void *arg) struct gfs2_sbd *sdp = arg; struct lm_lockstruct *ls = &sdp->sd_lockstruct; + if (gfs2_withdrawn(sdp)) { + fs_err(sdp, "recover_prep ignored due to withdraw.\n"); + return; + } spin_lock(&ls->ls_recover_spin); ls->ls_recover_block = ls->ls_recover_start; set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); @@ -1101,6 +1105,11 @@ static void gdlm_recover_slot(void *arg, struct dlm_slot *slot) struct lm_lockstruct *ls = &sdp->sd_lockstruct; int jid = slot->slot - 1; + if (gfs2_withdrawn(sdp)) { + fs_err(sdp, "recover_slot jid %d ignored due to withdraw.\n", + jid); + return; + } spin_lock(&ls->ls_recover_spin); if (ls->ls_recover_size < jid + 1) { fs_err(sdp, "recover_slot jid %d gen %u short size %d\n", @@ -1125,6 +1134,10 @@ static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots, struct gfs2_sbd *sdp = arg; struct lm_lockstruct *ls = &sdp->sd_lockstruct; + if (gfs2_withdrawn(sdp)) { + fs_err(sdp, "recover_done ignored due to withdraw.\n"); + return; + } /* ensure the ls jid arrays are large enough */ set_recover_size(sdp, slots, num_slots); @@ -1152,6 +1165,11 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid, { struct lm_lockstruct *ls = &sdp->sd_lockstruct; + if (gfs2_withdrawn(sdp)) { + fs_err(sdp, "recovery_result jid %d ignored due to withdraw.\n", + jid); + return; + } if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) return; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 85f830e56945..8cc26bef4e64 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -305,6 +305,11 @@ void gfs2_recover_func(struct work_struct *work) int error = 0; int jlocked = 0; + if (gfs2_withdrawn(sdp)) { + fs_err(sdp, "jid=%u: Recovery not attempted due to withdraw.\n", + jd->jd_jid); + goto fail; + } t_start = ktime_get(); if (sdp->sd_args.ar_spectator) goto fail; -- cgit v1.2.3 From 0d91061a372671aec1af729686ad9241a59fc328 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 18 Feb 2019 08:37:25 -0700 Subject: gfs2: move check_journal_clean to util.c for future use Before this patch function check_journal_clean was in ops_fstype.c. This patch moves it to util.c so we can make use of it elsewhere in a future patch. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/ops_fstype.c | 42 ------------------------------------------ fs/gfs2/util.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/util.h | 1 + 3 files changed, 46 insertions(+), 42 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b3e904bcc02c..d19ee57c99ce 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -600,48 +600,6 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) return error; } -/** - * check_journal_clean - Make sure a journal is clean for a spectator mount - * @sdp: The GFS2 superblock - * @jd: The journal descriptor - * - * Returns: 0 if the journal is clean or locked, else an error - */ -static int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) -{ - int error; - struct gfs2_holder j_gh; - struct gfs2_log_header_host head; - struct gfs2_inode *ip; - - ip = GFS2_I(jd->jd_inode); - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP | - GL_EXACT | GL_NOCACHE, &j_gh); - if (error) { - fs_err(sdp, "Error locking journal for spectator mount.\n"); - return -EPERM; - } - error = gfs2_jdesc_check(jd); - if (error) { - fs_err(sdp, "Error checking journal for spectator mount.\n"); - goto out_unlock; - } - error = gfs2_find_jhead(jd, &head, false); - if (error) { - fs_err(sdp, "Error parsing journal for spectator mount.\n"); - goto out_unlock; - } - if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { - error = -EPERM; - fs_err(sdp, "jid=%u: Journal is dirty, so the first mounter " - "must not be a spectator.\n", jd->jd_jid); - } - -out_unlock: - gfs2_glock_dq_uninit(&j_gh); - return error; -} - static int init_journal(struct gfs2_sbd *sdp, int undo) { struct inode *master = d_inode(sdp->sd_master_dir); diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 47cd40de08b1..86965e6089c6 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -16,7 +16,10 @@ #include "gfs2.h" #include "incore.h" #include "glock.h" +#include "lops.h" +#include "recovery.h" #include "rgrp.h" +#include "super.h" #include "util.h" struct kmem_cache *gfs2_glock_cachep __read_mostly; @@ -33,6 +36,48 @@ void gfs2_assert_i(struct gfs2_sbd *sdp) fs_emerg(sdp, "fatal assertion failed\n"); } +/** + * check_journal_clean - Make sure a journal is clean for a spectator mount + * @sdp: The GFS2 superblock + * @jd: The journal descriptor + * + * Returns: 0 if the journal is clean or locked, else an error + */ +int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) +{ + int error; + struct gfs2_holder j_gh; + struct gfs2_log_header_host head; + struct gfs2_inode *ip; + + ip = GFS2_I(jd->jd_inode); + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP | + GL_EXACT | GL_NOCACHE, &j_gh); + if (error) { + fs_err(sdp, "Error locking journal for spectator mount.\n"); + return -EPERM; + } + error = gfs2_jdesc_check(jd); + if (error) { + fs_err(sdp, "Error checking journal for spectator mount.\n"); + goto out_unlock; + } + error = gfs2_find_jhead(jd, &head, false); + if (error) { + fs_err(sdp, "Error parsing journal for spectator mount.\n"); + goto out_unlock; + } + if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { + error = -EPERM; + fs_err(sdp, "jid=%u: Journal is dirty, so the first mounter " + "must not be a spectator.\n", jd->jd_jid); + } + +out_unlock: + gfs2_glock_dq_uninit(&j_gh); + return error; +} + void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...) { struct va_format vaf; diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 16b2cc6c4560..cf613497a20e 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -136,6 +136,7 @@ static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type, int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, unsigned int line); +int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd); #define gfs2_io_error(sdp) \ gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__); -- cgit v1.2.3 From a72d2401f54b7db41c77ab971238a06eafe929fb Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 13 Jun 2019 13:28:45 -0500 Subject: gfs2: Allow some glocks to be used during withdraw We need to allow some glocks to be enqueued, dequeued, promoted, and demoted when we're withdrawn. For example, to maintain metadata integrity, we should disallow the use of inode and rgrp glocks when withdrawn. Other glocks, like iopen or the transaction glocks may be safely used because none of their metadata goes through the journal. So in general, we should disallow all glocks with an address space, and allow all the others. One exception is: we need to allow our active journal to be demoted so others may recover it. Allowing glocks after withdraw gives us the ability to take appropriate action (in a following patch) to have our journal properly replayed by another node rather than just abandoning the current transactions and pretending nothing bad happened, leaving the other nodes free to modify the blocks we had in our journal, which may result in file system corruption. Signed-off-by: Bob Peterson --- fs/gfs2/glock.c | 33 +++++++++++++++++++++++++++++---- fs/gfs2/glops.c | 10 +++++++--- fs/gfs2/incore.h | 8 +++++--- fs/gfs2/ops_fstype.c | 4 ++++ 4 files changed, 45 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 1cb471a8bc87..454d94dd8933 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -133,6 +133,33 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu) } } +/** + * glock_blocked_by_withdraw - determine if we can still use a glock + * @gl: the glock + * + * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted + * when we're withdrawn. For example, to maintain metadata integrity, we should + * disallow the use of inode and rgrp glocks when withdrawn. Other glocks, like + * iopen or the transaction glocks may be safely used because none of their + * metadata goes through the journal. So in general, we should disallow all + * glocks that are journaled, and allow all the others. One exception is: + * we need to allow our active journal to be promoted and demoted so others + * may recover it and we can reacquire it when they're done. + */ +static bool glock_blocked_by_withdraw(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + if (likely(!gfs2_withdrawn(sdp))) + return false; + if (gl->gl_ops->go_flags & GLOF_NONDISK) + return false; + if (!sdp->sd_jdesc || + gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr) + return false; + return true; +} + void gfs2_glock_free(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; @@ -549,8 +576,7 @@ __acquires(&gl->gl_lockref.lock) unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); int ret; - if (unlikely(gfs2_withdrawn(sdp)) && - target != LM_ST_UNLOCKED) + if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl)) return; lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | LM_FLAG_PRIORITY); @@ -1194,10 +1220,9 @@ trap_recursive: int gfs2_glock_nq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; int error = 0; - if (unlikely(gfs2_withdrawn(sdp))) + if (glock_blocked_by_withdraw(gl)) return -EIO; if (test_bit(GLF_LRU, &gl->gl_flags)) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 1c557457c753..3553ca939337 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -579,6 +579,7 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) const struct gfs2_glock_operations gfs2_meta_glops = { .go_type = LM_TYPE_META, + .go_flags = GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_inode_glops = { @@ -605,30 +606,33 @@ const struct gfs2_glock_operations gfs2_freeze_glops = { .go_xmote_bh = freeze_go_xmote_bh, .go_demote_ok = freeze_go_demote_ok, .go_type = LM_TYPE_NONDISK, + .go_flags = GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_iopen_glops = { .go_type = LM_TYPE_IOPEN, .go_callback = iopen_go_callback, - .go_flags = GLOF_LRU, + .go_flags = GLOF_LRU | GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_flock_glops = { .go_type = LM_TYPE_FLOCK, - .go_flags = GLOF_LRU, + .go_flags = GLOF_LRU | GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_nondisk_glops = { .go_type = LM_TYPE_NONDISK, + .go_flags = GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_quota_glops = { .go_type = LM_TYPE_QUOTA, - .go_flags = GLOF_LVB | GLOF_LRU, + .go_flags = GLOF_LVB | GLOF_LRU | GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_journal_glops = { .go_type = LM_TYPE_JOURNAL, + .go_flags = GLOF_NONDISK, }; const struct gfs2_glock_operations *gfs2_glops_list[] = { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index ab89f746b3b6..3072707aff7a 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -244,9 +244,10 @@ struct gfs2_glock_operations { void (*go_callback)(struct gfs2_glock *gl, bool remote); const int go_type; const unsigned long go_flags; -#define GLOF_ASPACE 1 -#define GLOF_LVB 2 -#define GLOF_LRU 4 +#define GLOF_ASPACE 1 /* address space attached */ +#define GLOF_LVB 2 /* Lock Value Block attached */ +#define GLOF_LRU 4 /* LRU managed */ +#define GLOF_NONDISK 8 /* not I/O related */ }; enum { @@ -541,6 +542,7 @@ struct gfs2_jdesc { struct list_head jd_revoke_list; unsigned int jd_replay_tail; + u64 jd_no_addr; }; struct gfs2_statfs_change_host { diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index d19ee57c99ce..74389d856dd3 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -552,6 +552,8 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) mutex_lock(&sdp->sd_jindex_mutex); for (;;) { + struct gfs2_inode *jip; + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh); if (error) break; @@ -591,6 +593,8 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) spin_lock(&sdp->sd_jindex_spin); jd->jd_jid = sdp->sd_journals++; + jip = GFS2_I(jd->jd_inode); + jd->jd_no_addr = jip->i_no_addr; list_add_tail(&jd->jd_list, &sdp->sd_jindex_list); spin_unlock(&sdp->sd_jindex_spin); } -- cgit v1.2.3 From 601ef0d52e9617588fcff3df26953592f2eb44ac Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 28 Jan 2020 20:23:45 +0100 Subject: gfs2: Force withdraw to replay journals and wait for it to finish When a node withdraws from a file system, it often leaves its journal in an incomplete state. This is especially true when the withdraw is caused by io errors writing to the journal. Before this patch, a withdraw would try to write a "shutdown" record to the journal, tell dlm it's done with the file system, and none of the other nodes know about the problem. Later, when the problem is fixed and the withdrawn node is rebooted, it would then discover that its own journal was incomplete, and replay it. However, replaying it at this point is almost guaranteed to introduce corruption because the other nodes are likely to have used affected resource groups that appeared in the journal since the time of the withdraw. Replaying the journal later will overwrite any changes made, and not through any fault of dlm, which was instructed during the withdraw to release those resources. This patch makes file system withdraws seen by the entire cluster. Withdrawing nodes dequeue their journal glock to allow recovery. The remaining nodes check all the journals to see if they are clean or in need of replay. They try to replay dirty journals, but only the journals of withdrawn nodes will be "not busy" and therefore available for replay. Until the journal replay is complete, no i/o related glocks may be given out, to ensure that the replay does not cause the aforementioned corruption: We cannot allow any journal replay to overwrite blocks associated with a glock once it is held. The "live" glock which is now used to signal when a withdraw occurs. When a withdraw occurs, the node signals its withdraw by dequeueing the "live" glock and trying to enqueue it in EX mode, thus forcing the other nodes to all see a demote request, by way of a "1CB" (one callback) try lock. The "live" glock is not granted in EX; the callback is only just used to indicate a withdraw has occurred. Note that all nodes in the cluster must wait for the recovering node to finish replaying the withdrawing node's journal before continuing. To this end, it checks that the journals are clean multiple times in a retry loop. Also note that the withdraw function may be called from a wide variety of situations, and therefore, we need to take extra precautions to make sure pointers are valid before using them in many circumstances. We also need to take care when glocks decide to withdraw, since the withdraw code now uses glocks. Also, before this patch, if a process encountered an error and decided to withdraw, if another process was already withdrawing, the second withdraw would be silently ignored, which set it free to unlock its glocks. That's correct behavior if the original withdrawer encounters further errors down the road. But if secondary waiters don't wait for the journal replay, unlocking glocks will allow other nodes to use them, despite the fact that the journal containing those blocks is being replayed. The replay needs to finish before our glocks are released to other nodes. IOW, secondary withdraws need to wait for the first withdraw to finish. For example, if an rgrp glock is unlocked by a process that didn't wait for the first withdraw, a journal replay could introduce file system corruption by replaying a rgrp block that has already been granted to a different cluster node. Signed-off-by: Bob Peterson --- fs/gfs2/glock.c | 23 ++++++- fs/gfs2/glops.c | 77 +++++++++++++++++++++- fs/gfs2/incore.h | 9 +++ fs/gfs2/lock_dlm.c | 34 ++++++++++ fs/gfs2/meta_io.c | 3 +- fs/gfs2/ops_fstype.c | 11 +++- fs/gfs2/quota.c | 3 + fs/gfs2/super.c | 75 +++++++++++++++------ fs/gfs2/super.h | 1 - fs/gfs2/sys.c | 2 + fs/gfs2/util.c | 183 ++++++++++++++++++++++++++++++++++++++++++++++++++- 11 files changed, 390 insertions(+), 31 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 454d94dd8933..7602d0e2492c 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -271,7 +271,7 @@ static void __gfs2_glock_put(struct gfs2_glock *gl) gfs2_glock_remove_from_lru(gl); spin_unlock(&gl->gl_lockref.lock); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); - GLOCK_BUG_ON(gl, mapping && mapping->nrpages); + GLOCK_BUG_ON(gl, mapping && mapping->nrpages && !gfs2_withdrawn(sdp)); trace_gfs2_glock_put(gl); sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); } @@ -576,7 +576,8 @@ __acquires(&gl->gl_lockref.lock) unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); int ret; - if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl)) + if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) && + gh && !(gh->gh_flags & LM_FLAG_NOEXP)) return; lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | LM_FLAG_PRIORITY); @@ -1222,7 +1223,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; int error = 0; - if (glock_blocked_by_withdraw(gl)) + if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP)) return -EIO; if (test_bit(GLF_LRU, &gl->gl_flags)) @@ -1266,10 +1267,26 @@ int gfs2_glock_poll(struct gfs2_holder *gh) void gfs2_glock_dq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; unsigned delay = 0; int fast_path = 0; spin_lock(&gl->gl_lockref.lock); + /* + * If we're in the process of file system withdraw, we cannot just + * dequeue any glocks until our journal is recovered, lest we + * introduce file system corruption. We need two exceptions to this + * rule: We need to allow unlocking of nondisk glocks and the glock + * for our own journal that needs recovery. + */ + if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) && + glock_blocked_by_withdraw(gl) && + gh->gh_gl != sdp->sd_jinode_gl) { + sdp->sd_glock_dqs_held++; + might_sleep(); + wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, + TASK_UNINTERRUPTIBLE); + } if (gh->gh_flags & GL_NOCACHE) handle_callback(gl, LM_ST_UNLOCKED, 0, false); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 3553ca939337..7cfacbe35e59 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -29,6 +29,8 @@ struct workqueue_struct *gfs2_freeze_wq; +extern struct workqueue_struct *gfs2_control_wq; + static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) { fs_err(gl->gl_name.ln_sbd, @@ -496,13 +498,17 @@ static void freeze_go_sync(struct gfs2_glock *gl) int error = 0; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - if (gl->gl_state == LM_ST_SHARED && + if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) && test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); error = freeze_super(sdp->sd_vfs); if (error) { fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", error); + if (gfs2_withdrawn(sdp)) { + atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); + return; + } gfs2_assert_withdraw(sdp, 0); } queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work); @@ -577,6 +583,73 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) } } +/** + * inode_go_free - wake up anyone waiting for dlm's unlock ast to free it + * @gl: glock being freed + * + * For now, this is only used for the journal inode glock. In withdraw + * situations, we need to wait for the glock to be freed so that we know + * other nodes may proceed with recovery / journal replay. + */ +static void inode_go_free(struct gfs2_glock *gl) +{ + /* Note that we cannot reference gl_object because it's already set + * to NULL by this point in its lifecycle. */ + if (!test_bit(GLF_FREEING, &gl->gl_flags)) + return; + clear_bit_unlock(GLF_FREEING, &gl->gl_flags); + wake_up_bit(&gl->gl_flags, GLF_FREEING); +} + +/** + * nondisk_go_callback - used to signal when a node did a withdraw + * @gl: the nondisk glock + * @remote: true if this came from a different cluster node + * + */ +static void nondisk_go_callback(struct gfs2_glock *gl, bool remote) +{ + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + /* Ignore the callback unless it's from another node, and it's the + live lock. */ + if (!remote || gl->gl_name.ln_number != GFS2_LIVE_LOCK) + return; + + /* First order of business is to cancel the demote request. We don't + * really want to demote a nondisk glock. At best it's just to inform + * us of another node's withdraw. We'll keep it in SH mode. */ + clear_bit(GLF_DEMOTE, &gl->gl_flags); + clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); + + /* Ignore the unlock if we're withdrawn, unmounting, or in recovery. */ + if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || + test_bit(SDF_WITHDRAWN, &sdp->sd_flags) || + test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) + return; + + /* We only care when a node wants us to unlock, because that means + * they want a journal recovered. */ + if (gl->gl_demote_state != LM_ST_UNLOCKED) + return; + + if (sdp->sd_args.ar_spectator) { + fs_warn(sdp, "Spectator node cannot recover journals.\n"); + return; + } + + fs_warn(sdp, "Some node has withdrawn; checking for recovery.\n"); + set_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags); + /* + * We can't call remote_withdraw directly here or gfs2_recover_journal + * because this is called from the glock unlock function and the + * remote_withdraw needs to enqueue and dequeue the same "live" glock + * we were called from. So we queue it to the control work queue in + * lock_dlm. + */ + queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0); +} + const struct gfs2_glock_operations gfs2_meta_glops = { .go_type = LM_TYPE_META, .go_flags = GLOF_NONDISK, @@ -590,6 +663,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_dump = inode_go_dump, .go_type = LM_TYPE_INODE, .go_flags = GLOF_ASPACE | GLOF_LRU, + .go_free = inode_go_free, }; const struct gfs2_glock_operations gfs2_rgrp_glops = { @@ -623,6 +697,7 @@ const struct gfs2_glock_operations gfs2_flock_glops = { const struct gfs2_glock_operations gfs2_nondisk_glops = { .go_type = LM_TYPE_NONDISK, .go_flags = GLOF_NONDISK, + .go_callback = nondisk_go_callback, }; const struct gfs2_glock_operations gfs2_quota_glops = { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 3072707aff7a..8cd564bcf5e6 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -242,6 +242,7 @@ struct gfs2_glock_operations { void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl, const char *fs_id_buf); void (*go_callback)(struct gfs2_glock *gl, bool remote); + void (*go_free)(struct gfs2_glock *gl); const int go_type; const unsigned long go_flags; #define GLOF_ASPACE 1 /* address space attached */ @@ -343,6 +344,7 @@ enum { GLF_OBJECT = 14, /* Used only for tracing */ GLF_BLOCKING = 15, GLF_INODE_CREATING = 16, /* Inode creation occurring */ + GLF_FREEING = 18, /* Wait for glock to be freed */ }; struct gfs2_glock { @@ -619,6 +621,10 @@ enum { SDF_FORCE_AIL_FLUSH = 9, SDF_FS_FROZEN = 10, SDF_WITHDRAWING = 11, /* Will withdraw eventually */ + SDF_WITHDRAW_IN_PROG = 12, /* Withdraw is in progress */ + SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */ + SDF_WITHDRAW_RECOVERY = 14, /* Wait for journal recovery when we are + withdrawing */ }; enum gfs2_freeze_state { @@ -769,6 +775,7 @@ struct gfs2_sbd { struct gfs2_jdesc *sd_jdesc; struct gfs2_holder sd_journal_gh; struct gfs2_holder sd_jinode_gh; + struct gfs2_glock *sd_jinode_gl; struct gfs2_holder sd_sc_gh; struct gfs2_holder sd_qc_gh; @@ -830,6 +837,7 @@ struct gfs2_sbd { struct bio *sd_log_bio; wait_queue_head_t sd_log_flush_wait; int sd_log_error; /* First log error */ + wait_queue_head_t sd_withdraw_wait; atomic_t sd_reserving_log; wait_queue_head_t sd_reserving_log_wait; @@ -853,6 +861,7 @@ struct gfs2_sbd { unsigned long sd_last_warning; struct dentry *debugfs_dir; /* debugfs directory */ + unsigned long sd_glock_dqs_held; }; static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which) diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 57fdf53d2246..9f2b5609f225 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -16,6 +16,8 @@ #include "incore.h" #include "glock.h" +#include "glops.h" +#include "recovery.h" #include "util.h" #include "sys.h" #include "trace_gfs2.h" @@ -124,6 +126,8 @@ static void gdlm_ast(void *arg) switch (gl->gl_lksb.sb_status) { case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ + if (gl->gl_ops->go_free) + gl->gl_ops->go_free(gl); gfs2_glock_free(gl); return; case -DLM_ECANCEL: /* Cancel while getting lock */ @@ -323,6 +327,7 @@ static void gdlm_cancel(struct gfs2_glock *gl) /* * dlm/gfs2 recovery coordination using dlm_recover callbacks * + * 0. gfs2 checks for another cluster node withdraw, needing journal replay * 1. dlm_controld sees lockspace members change * 2. dlm_controld blocks dlm-kernel locking activity * 3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep) @@ -571,6 +576,28 @@ static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags) &ls->ls_control_lksb, "control_lock"); } +/** + * remote_withdraw - react to a node withdrawing from the file system + * @sdp: The superblock + */ +static void remote_withdraw(struct gfs2_sbd *sdp) +{ + struct gfs2_jdesc *jd; + int ret = 0, count = 0; + + list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { + if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) + continue; + ret = gfs2_recover_journal(jd, true); + if (ret) + break; + count++; + } + + /* Now drop the additional reference we acquired */ + fs_err(sdp, "Journals checked: %d, ret = %d.\n", count, ret); +} + static void gfs2_control_func(struct work_struct *work) { struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work); @@ -581,6 +608,13 @@ static void gfs2_control_func(struct work_struct *work) int recover_size; int i, error; + /* First check for other nodes that may have done a withdraw. */ + if (test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) { + remote_withdraw(sdp); + clear_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags); + return; + } + spin_lock(&ls->ls_recover_spin); /* * No MOUNT_DONE means we're still mounting; control_mount() diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 0c3772974030..4b72abcf83b2 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -251,7 +251,8 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head *bh, *bhs[2]; int num = 0; - if (unlikely(gfs2_withdrawn(sdp))) { + if (unlikely(gfs2_withdrawn(sdp)) && + (!sdp->sd_jdesc || (blkno != sdp->sd_jdesc->jd_no_addr))) { *bhp = NULL; return -EIO; } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 74389d856dd3..70d2abd000d8 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -656,7 +656,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid, &gfs2_journal_glops, - LM_ST_EXCLUSIVE, LM_FLAG_NOEXP, + LM_ST_EXCLUSIVE, + LM_FLAG_NOEXP | GL_NOCACHE, &sdp->sd_journal_gh); if (error) { fs_err(sdp, "can't acquire journal glock: %d\n", error); @@ -664,6 +665,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) } ip = GFS2_I(sdp->sd_jdesc->jd_inode); + sdp->sd_jinode_gl = ip->i_gl; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT | GL_NOCACHE, &sdp->sd_jinode_gh); @@ -724,10 +726,13 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) return 0; fail_jinode_gh: - if (!sdp->sd_args.ar_spectator) + /* A withdraw may have done dq/uninit so now we need to check it */ + if (!sdp->sd_args.ar_spectator && + gfs2_holder_initialized(&sdp->sd_jinode_gh)) gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); fail_journal_gh: - if (!sdp->sd_args.ar_spectator) + if (!sdp->sd_args.ar_spectator && + gfs2_holder_initialized(&sdp->sd_journal_gh)) gfs2_glock_dq_uninit(&sdp->sd_journal_gh); fail_jindex: gfs2_jindex_free(sdp); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index dbe87b2b55af..43ffe5997098 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1541,6 +1541,8 @@ int gfs2_quotad(void *data) while (!kthread_should_stop()) { + if (gfs2_withdrawn(sdp)) + goto bypass; /* Update the master statfs file */ if (sdp->sd_statfs_force_sync) { int error = gfs2_statfs_sync(sdp->sd_vfs, 0); @@ -1561,6 +1563,7 @@ int gfs2_quotad(void *data) try_to_freeze(); +bypass: t = min(quotad_timeo, statfs_timeo); prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 68cc7c291a81..693c6d13473c 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -61,11 +61,13 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp) sdp->sd_journals = 0; spin_unlock(&sdp->sd_jindex_spin); + sdp->sd_jdesc = NULL; while (!list_empty(&list)) { jd = list_entry(list.next, struct gfs2_jdesc, jd_list); gfs2_free_journal_extents(jd); list_del(&jd->jd_list); iput(jd->jd_inode); + jd->jd_inode = NULL; kfree(jd); } } @@ -171,9 +173,13 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) goto fail_threads; j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); + if (gfs2_withdrawn(sdp)) { + error = -EIO; + goto fail; + } error = gfs2_find_jhead(sdp->sd_jdesc, &head, false); - if (error) + if (error || gfs2_withdrawn(sdp)) goto fail; if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { @@ -187,7 +193,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) gfs2_log_pointers_init(sdp, head.lh_blkno); error = gfs2_quota_init(sdp); - if (error) + if (error || gfs2_withdrawn(sdp)) goto fail; set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); @@ -599,34 +605,63 @@ out: int gfs2_make_fs_ro(struct gfs2_sbd *sdp) { struct gfs2_holder freeze_gh; - int error; - - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE, - &freeze_gh); - if (error && !gfs2_withdrawn(sdp)) - return error; + int error = 0; + int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + + gfs2_holder_mark_uninitialized(&freeze_gh); + if (sdp->sd_freeze_gl && + !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) { + if (!log_write_allowed) { + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, + LM_ST_SHARED, GL_NOCACHE | + LM_FLAG_TRY, &freeze_gh); + if (error == GLR_TRYFAILED) + error = 0; + } else { + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, + LM_ST_SHARED, GL_NOCACHE, + &freeze_gh); + if (error && !gfs2_withdrawn(sdp)) + return error; + } + } flush_workqueue(gfs2_delete_workqueue); - if (sdp->sd_quotad_process) + if (!log_write_allowed && current == sdp->sd_quotad_process) + fs_warn(sdp, "The quotad daemon is withdrawing.\n"); + else if (sdp->sd_quotad_process) kthread_stop(sdp->sd_quotad_process); sdp->sd_quotad_process = NULL; - if (sdp->sd_logd_process) + + if (!log_write_allowed && current == sdp->sd_logd_process) + fs_warn(sdp, "The logd daemon is withdrawing.\n"); + else if (sdp->sd_logd_process) kthread_stop(sdp->sd_logd_process); sdp->sd_logd_process = NULL; - gfs2_quota_sync(sdp->sd_vfs, 0); - gfs2_statfs_sync(sdp->sd_vfs, 0); - - gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN | - GFS2_LFC_MAKE_FS_RO); - wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0); - gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); + if (log_write_allowed) { + gfs2_quota_sync(sdp->sd_vfs, 0); + gfs2_statfs_sync(sdp->sd_vfs, 0); + gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN | + GFS2_LFC_MAKE_FS_RO); + wait_event(sdp->sd_reserving_log_wait, + atomic_read(&sdp->sd_reserving_log) == 0); + gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == + sdp->sd_jdesc->jd_blocks); + } else { + wait_event_timeout(sdp->sd_reserving_log_wait, + atomic_read(&sdp->sd_reserving_log) == 0, + HZ * 5); + } if (gfs2_holder_initialized(&freeze_gh)) gfs2_glock_dq_uninit(&freeze_gh); gfs2_quota_cleanup(sdp); + if (!log_write_allowed) + sdp->sd_vfs->s_flags |= SB_RDONLY; + return error; } @@ -677,8 +712,10 @@ restart: gfs2_glock_put(sdp->sd_freeze_gl); if (!sdp->sd_args.ar_spectator) { - gfs2_glock_dq_uninit(&sdp->sd_journal_gh); - gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); + if (gfs2_holder_initialized(&sdp->sd_journal_gh)) + gfs2_glock_dq_uninit(&sdp->sd_journal_gh); + if (gfs2_holder_initialized(&sdp->sd_jinode_gh)) + gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); gfs2_glock_dq_uninit(&sdp->sd_sc_gh); gfs2_glock_dq_uninit(&sdp->sd_qc_gh); iput(sdp->sd_sc_inode); diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index b8bf811a1305..51900554ed81 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -26,7 +26,6 @@ extern void gfs2_jindex_free(struct gfs2_sbd *sdp); extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid); extern int gfs2_jdesc_check(struct gfs2_jdesc *jd); - extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, struct gfs2_inode **ipp); diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index a2eae5c578d6..d28c41bd69b0 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -435,6 +435,8 @@ int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid) * never clear the DFL_BLOCK_LOCKS flag, so all our locks would * permanently stop working. */ + if (!sdp->sd_jdesc) + goto out; if (sdp->sd_jdesc->jd_jid == jid && !sdp->sd_args.ar_spectator) goto out; rv = -ENOENT; diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 86965e6089c6..155a2249a32b 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -11,11 +11,14 @@ #include #include #include +#include #include #include "gfs2.h" #include "incore.h" #include "glock.h" +#include "glops.h" +#include "log.h" #include "lops.h" #include "recovery.h" #include "rgrp.h" @@ -78,6 +81,167 @@ out_unlock: return error; } +static void signal_our_withdraw(struct gfs2_sbd *sdp) +{ + struct gfs2_glock *gl = sdp->sd_live_gh.gh_gl; + struct inode *inode = sdp->sd_jdesc->jd_inode; + struct gfs2_inode *ip = GFS2_I(inode); + u64 no_formal_ino = ip->i_no_formal_ino; + int ret = 0; + int tries; + + if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) + return; + + /* Prevent any glock dq until withdraw recovery is complete */ + set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); + /* + * Don't tell dlm we're bailing until we have no more buffers in the + * wind. If journal had an IO error, the log code should just purge + * the outstanding buffers rather than submitting new IO. Making the + * file system read-only will flush the journal, etc. + * + * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown + * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write + * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and + * therefore we need to clear SDF_JOURNAL_LIVE manually. + */ + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + if (!sb_rdonly(sdp->sd_vfs)) + ret = gfs2_make_fs_ro(sdp); + + /* + * Drop the glock for our journal so another node can recover it. + */ + if (gfs2_holder_initialized(&sdp->sd_journal_gh)) { + gfs2_glock_dq_wait(&sdp->sd_journal_gh); + gfs2_holder_uninit(&sdp->sd_journal_gh); + } + sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq(&sdp->sd_jinode_gh); + if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) { + /* Make sure gfs2_unfreeze works if partially-frozen */ + flush_workqueue(gfs2_freeze_wq); + atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); + thaw_super(sdp->sd_vfs); + } else { + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE); + } + + /* + * holder_uninit to force glock_put, to force dlm to let go + */ + gfs2_holder_uninit(&sdp->sd_jinode_gh); + + /* + * Note: We need to be careful here: + * Our iput of jd_inode will evict it. The evict will dequeue its + * glock, but the glock dq will wait for the withdraw unless we have + * exception code in glock_dq. + */ + iput(inode); + /* + * Wait until the journal inode's glock is freed. This allows try locks + * on other nodes to be successful, otherwise we remain the owner of + * the glock as far as dlm is concerned. + */ + if (gl->gl_ops->go_free) { + set_bit(GLF_FREEING, &gl->gl_flags); + wait_on_bit(&gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE); + } + + if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ + clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); + goto skip_recovery; + } + /* + * Dequeue the "live" glock, but keep a reference so it's never freed. + */ + gfs2_glock_hold(gl); + gfs2_glock_dq_wait(&sdp->sd_live_gh); + /* + * We enqueue the "live" glock in EX so that all other nodes + * get a demote request and act on it. We don't really want the + * lock in EX, so we send a "try" lock with 1CB to produce a callback. + */ + fs_warn(sdp, "Requesting recovery of jid %d.\n", + sdp->sd_lockstruct.ls_jid); + gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | LM_FLAG_NOEXP, + &sdp->sd_live_gh); + msleep(GL_GLOCK_MAX_HOLD); + /* + * This will likely fail in a cluster, but succeed standalone: + */ + ret = gfs2_glock_nq(&sdp->sd_live_gh); + + /* + * If we actually got the "live" lock in EX mode, there are no other + * nodes available to replay our journal. So we try to replay it + * ourselves. We hold the "live" glock to prevent other mounters + * during recovery, then just dequeue it and reacquire it in our + * normal SH mode. Just in case the problem that caused us to + * withdraw prevents us from recovering our journal (e.g. io errors + * and such) we still check if the journal is clean before proceeding + * but we may wait forever until another mounter does the recovery. + */ + if (ret == 0) { + fs_warn(sdp, "No other mounters found. Trying to recover our " + "own journal jid %d.\n", sdp->sd_lockstruct.ls_jid); + if (gfs2_recover_journal(sdp->sd_jdesc, 1)) + fs_warn(sdp, "Unable to recover our journal jid %d.\n", + sdp->sd_lockstruct.ls_jid); + gfs2_glock_dq_wait(&sdp->sd_live_gh); + gfs2_holder_reinit(LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT, + &sdp->sd_live_gh); + gfs2_glock_nq(&sdp->sd_live_gh); + } + + gfs2_glock_queue_put(gl); /* drop the extra reference we acquired */ + clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); + + /* + * At this point our journal is evicted, so we need to get a new inode + * for it. Once done, we need to call gfs2_find_jhead which + * calls gfs2_map_journal_extents to map it for us again. + * + * Note that we don't really want it to look up a FREE block. The + * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup + * which would otherwise fail because it requires grabbing an rgrp + * glock, which would fail with -EIO because we're withdrawing. + */ + inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN, + sdp->sd_jdesc->jd_no_addr, no_formal_ino, + GFS2_BLKST_FREE); + if (IS_ERR(inode)) { + fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n", + sdp->sd_lockstruct.ls_jid, PTR_ERR(inode)); + goto skip_recovery; + } + sdp->sd_jdesc->jd_inode = inode; + + /* + * Now wait until recovery is complete. + */ + for (tries = 0; tries < 10; tries++) { + ret = check_journal_clean(sdp, sdp->sd_jdesc); + if (!ret) + break; + msleep(HZ); + fs_warn(sdp, "Waiting for journal recovery jid %d.\n", + sdp->sd_lockstruct.ls_jid); + } +skip_recovery: + if (!ret) + fs_warn(sdp, "Journal recovery complete for jid %d.\n", + sdp->sd_lockstruct.ls_jid); + else + fs_warn(sdp, "Journal recovery skipped for %d until next " + "mount.\n", sdp->sd_lockstruct.ls_jid); + fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held); + sdp->sd_glock_dqs_held = 0; + wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY); +} + void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...) { struct va_format vaf; @@ -100,13 +264,23 @@ int gfs2_withdraw(struct gfs2_sbd *sdp) const struct lm_lockops *lm = ls->ls_ops; if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && - test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) - return 0; + test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) { + if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags)) + return -1; + + wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG, + TASK_UNINTERRUPTIBLE); + return -1; + } + + set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { fs_err(sdp, "about to withdraw this file system\n"); BUG_ON(sdp->sd_args.ar_debug); + signal_our_withdraw(sdp); + kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) @@ -117,8 +291,11 @@ int gfs2_withdraw(struct gfs2_sbd *sdp) lm->lm_unmount(sdp); } set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); - fs_err(sdp, "withdrawn\n"); + fs_err(sdp, "File system withdrawn\n"); dump_stack(); + clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); + smp_mb__after_atomic(); + wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG); } if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) -- cgit v1.2.3 From 33dbd1e41a1dd549eb19a29477119d4e29766210 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 22 May 2019 09:21:21 -0500 Subject: gfs2: fix infinite loop when checking ail item count before go_inval Before this patch, the rgrp_go_inval and inode_go_inval functions each checked if there were any items left on the ail count (by way of a count), and if so, did a withdraw. But the withdraw code now uses glocks when changing the file system to read-only status. So we can not have glock functions withdrawing or a hang will likely result: The glocks can't be serviced by the work_func if the work_func is busy doing its own withdraw. This patch removes the checks from the go_inval functions and adds a centralized check in do_xmote to warn about the problem and not withdraw, but flag the error so it's eventually caught when the logd daemon eventually runs. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 17 +++++++++++++++-- fs/gfs2/glops.c | 3 --- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 7602d0e2492c..5afaf92057c0 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -604,9 +604,22 @@ __acquires(&gl->gl_lockref.lock) spin_unlock(&gl->gl_lockref.lock); if (glops->go_sync) glops->go_sync(gl); - if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) + if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) { + /* + * The call to go_sync should have cleared out the ail list. + * If there are still items, we have a problem. We ought to + * withdraw, but we can't because the withdraw code also uses + * glocks. Warn about the error, dump the glock, then fall + * through and wait for logd to do the withdraw for us. + */ + if ((atomic_read(&gl->gl_ail_count) != 0) && + (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) { + gfs2_assert_warn(sdp, !atomic_read(&gl->gl_ail_count)); + gfs2_dump_glock(NULL, gl, true); + } glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); - clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); + clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); + } gfs2_glock_hold(gl); if (sdp->sd_lockstruct.ls_ops->lm_lock) { diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 7cfacbe35e59..b58924482d9a 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -188,7 +188,6 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags) gfs2_rgrp_brelse(rgd); WARN_ON_ONCE(!(flags & DIO_METADATA)); - gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); truncate_inode_pages_range(mapping, gl->gl_vm.start, gl->gl_vm.end); if (rgd) @@ -288,8 +287,6 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) { struct gfs2_inode *ip = gfs2_glock2inode(gl); - gfs2_assert_withdraw(gl->gl_name.ln_sbd, !atomic_read(&gl->gl_ail_count)); - if (flags & DIO_METADATA) { struct address_space *mapping = gfs2_glock2aspace(gl); truncate_inode_pages(mapping, 0); -- cgit v1.2.3 From 7d9f9249580e05a9af823cdbb7a91f2758d1c63b Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 18 Feb 2019 13:04:13 -0700 Subject: gfs2: Add verbose option to check_journal_clean Before this patch, function check_journal_clean would give messages related to journal recovery. That's fine for mount time, but when a node withdraws and forces replay that way, we don't want all those distracting and misleading messages. This patch adds a new parameter to make those messages optional. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/ops_fstype.c | 2 +- fs/gfs2/util.c | 23 ++++++++++++++++------- fs/gfs2/util.h | 4 +++- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 70d2abd000d8..1aeb591bfd28 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -696,7 +696,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) struct gfs2_jdesc *jd = gfs2_jdesc_find(sdp, x); if (sdp->sd_args.ar_spectator) { - error = check_journal_clean(sdp, jd); + error = check_journal_clean(sdp, jd, true); if (error) goto fail_jinode_gh; continue; diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 155a2249a32b..20a5860841e2 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -46,7 +46,8 @@ void gfs2_assert_i(struct gfs2_sbd *sdp) * * Returns: 0 if the journal is clean or locked, else an error */ -int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) +int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, + bool verbose) { int error; struct gfs2_holder j_gh; @@ -57,23 +58,31 @@ int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT | GL_NOCACHE, &j_gh); if (error) { - fs_err(sdp, "Error locking journal for spectator mount.\n"); + if (verbose) + fs_err(sdp, "Error %d locking journal for spectator " + "mount.\n", error); return -EPERM; } error = gfs2_jdesc_check(jd); if (error) { - fs_err(sdp, "Error checking journal for spectator mount.\n"); + if (verbose) + fs_err(sdp, "Error checking journal for spectator " + "mount.\n"); goto out_unlock; } error = gfs2_find_jhead(jd, &head, false); if (error) { - fs_err(sdp, "Error parsing journal for spectator mount.\n"); + if (verbose) + fs_err(sdp, "Error parsing journal for spectator " + "mount.\n"); goto out_unlock; } if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { error = -EPERM; - fs_err(sdp, "jid=%u: Journal is dirty, so the first mounter " - "must not be a spectator.\n", jd->jd_jid); + if (verbose) + fs_err(sdp, "jid=%u: Journal is dirty, so the first " + "mounter must not be a spectator.\n", + jd->jd_jid); } out_unlock: @@ -223,7 +232,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) * Now wait until recovery is complete. */ for (tries = 0; tries < 10; tries++) { - ret = check_journal_clean(sdp, sdp->sd_jdesc); + ret = check_journal_clean(sdp, sdp->sd_jdesc, false); if (!ret) break; msleep(HZ); diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index cf613497a20e..97117a766bde 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -136,7 +136,9 @@ static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type, int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, unsigned int line); -int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd); + +extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, + bool verbose); #define gfs2_io_error(sdp) \ gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__); -- cgit v1.2.3 From 5e4c7632aae1cce137792647f4fb6f599d1da893 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 21 Feb 2019 14:28:07 -0700 Subject: gfs2: Issue revokes more intelligently Before this patch, function gfs2_write_revokes would call gfs2_ail1_empty, then traverse the sd_ail1_list looking for transactions that had bds which were no longer queued to a glock. And if it found some, it would try to issue revokes for them, up to a predetermined maximum. There were two problems with how it did this. First was the fact that gfs2_ail1_empty moves transactions which have nothing remaining on the ail1 list from the sd_ail1_list to the sd_ail2_list, thus making its traversal of sd_ail1_list miss them completely, and therefore, never issue revokes for them. Second was the fact that there were three traversals (or partial traversals) of the sd_ail1_list, each of which took and then released the sd_ail_lock lock: First inside gfs2_ail1_empty, second to determine if there are any revokes to be issued, and third to actually issue them. All this taking and releasing of the sd_ail_lock meant other processes could modify the lists and the conditions in which we're working. This patch simplies the whole process by adding a new parameter to function gfs2_ail1_empty, max_revokes. For normal calls, this is passed in as 0, meaning we don't want to issue any revokes. For function gfs2_write_revokes, we pass in the maximum number of revokes we can, thus allowing gfs2_ail1_empty to add the revokes where needed. This simplies the code, allows for a single holding of the sd_ail_lock, and allows gfs2_ail1_empty to add revokes for all the necessary bd items without missing any. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/log.c | 74 +++++++++++++++++++++++++++++------------------------------ 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 584bb7ce15bf..578c1e0cd415 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -191,11 +191,13 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp) /** * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced * @sdp: the filesystem - * @ai: the AIL entry + * @tr: the transaction + * @max_revokes: If nonzero, issue revokes for the bd items for written buffers * */ -static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) +static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + int *max_revokes) { struct gfs2_bufdata *bd, *s; struct buffer_head *bh; @@ -220,6 +222,17 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) gfs2_io_error_bh(sdp, bh); gfs2_withdraw_delayed(sdp); } + /* + * If we have space for revokes and the bd is no longer on any + * buf list, we can just add a revoke for it immediately and + * avoid having to put it on the ail2 list, where it would need + * to be revoked later. + */ + if (*max_revokes && list_empty(&bd->bd_list)) { + gfs2_add_revoke(sdp, bd); + (*max_revokes)--; + continue; + } list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); } } @@ -227,11 +240,12 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) /** * gfs2_ail1_empty - Try to empty the ail1 lists * @sdp: The superblock + * @max_revokes: If non-zero, add revokes where appropriate * * Tries to empty the ail1 lists, starting with the oldest first */ -static int gfs2_ail1_empty(struct gfs2_sbd *sdp) +static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes) { struct gfs2_trans *tr, *s; int oldest_tr = 1; @@ -239,7 +253,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp) spin_lock(&sdp->sd_ail_lock); list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { - gfs2_ail1_empty_one(sdp, tr); + gfs2_ail1_empty_one(sdp, tr, &max_revokes); if (list_empty(&tr->tr_ail1_list) && oldest_tr) list_move(&tr->tr_list, &sdp->sd_ail2_list); else @@ -627,27 +641,24 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl) } } +/** + * gfs2_write_revokes - Add as many revokes to the system transaction as we can + * @sdp: The GFS2 superblock + * + * Our usual strategy is to defer writing revokes as much as we can in the hope + * that we'll eventually overwrite the journal, which will make those revokes + * go away. This changes when we flush the log: at that point, there will + * likely be some left-over space in the last revoke block of that transaction. + * We can fill that space with additional revokes for blocks that have already + * been written back. This will basically come at no cost now, and will save + * us from having to keep track of those blocks on the AIL2 list later. + */ void gfs2_write_revokes(struct gfs2_sbd *sdp) { - struct gfs2_trans *tr; - struct gfs2_bufdata *bd, *tmp; - int have_revokes = 0; + /* number of revokes we still have room for */ int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64); - gfs2_ail1_empty(sdp); - spin_lock(&sdp->sd_ail_lock); - list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) { - list_for_each_entry(bd, &tr->tr_ail2_list, bd_ail_st_list) { - if (list_empty(&bd->bd_list)) { - have_revokes = 1; - goto done; - } - } - } -done: - spin_unlock(&sdp->sd_ail_lock); - if (have_revokes == 0) - return; + gfs2_log_lock(sdp); while (sdp->sd_log_num_revoke > max_revokes) max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64); max_revokes -= sdp->sd_log_num_revoke; @@ -658,20 +669,7 @@ done: if (!sdp->sd_log_blks_reserved) atomic_dec(&sdp->sd_log_blks_free); } - gfs2_log_lock(sdp); - spin_lock(&sdp->sd_ail_lock); - list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) { - list_for_each_entry_safe(bd, tmp, &tr->tr_ail2_list, bd_ail_st_list) { - if (max_revokes == 0) - goto out_of_blocks; - if (!list_empty(&bd->bd_list)) - continue; - gfs2_add_revoke(sdp, bd); - max_revokes--; - } - } -out_of_blocks: - spin_unlock(&sdp->sd_ail_lock); + gfs2_ail1_empty(sdp, max_revokes); gfs2_log_unlock(sdp); if (!sdp->sd_log_num_revoke) { @@ -870,7 +868,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) for (;;) { gfs2_ail1_start(sdp); gfs2_ail1_wait(sdp); - if (gfs2_ail1_empty(sdp)) + if (gfs2_ail1_empty(sdp, 0)) break; } if (gfs2_withdrawn(sdp)) @@ -1040,7 +1038,7 @@ int gfs2_logd(void *data) did_flush = false; if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { - gfs2_ail1_empty(sdp); + gfs2_ail1_empty(sdp, 0); gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_LOGD_JFLUSH_REQD); did_flush = true; @@ -1049,7 +1047,7 @@ int gfs2_logd(void *data) if (gfs2_ail_flush_reqd(sdp)) { gfs2_ail1_start(sdp); gfs2_ail1_wait(sdp); - gfs2_ail1_empty(sdp); + gfs2_ail1_empty(sdp, 0); gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_LOGD_AIL_FLUSH_REQD); did_flush = true; -- cgit v1.2.3 From f05b86db314df9f31c4c21153338f6a38b1f0de7 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 15 May 2019 09:10:35 -0500 Subject: gfs2: Prepare to withdraw as soon as an IO error occurs in log write Before this patch, function gfs2_end_log_write would detect any IO errors writing to the journal and put out an appropriate message, but it never set a withdrawing condition. Eventually, the log daemon would see the error and determine it was time to withdraw, but in the meantime, other processes could continue running as if nothing bad ever happened. The biggest consequence is that __gfs2_glock_put would BUG() when it saw that there were still unwritten items. This patch sets the WITHDRAWING status as soon as an IO error is detected, and that way, the BUG will be avoided so the file system can be properly withdrawn and unmounted. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/lops.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 0af2e5ff0d97..7307e5e721d1 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -206,6 +206,9 @@ static void gfs2_end_log_write(struct bio *bio) if (!cmpxchg(&sdp->sd_log_error, 0, (int)bio->bi_status)) fs_err(sdp, "Error %d writing to journal, jid=%u\n", bio->bi_status, sdp->sd_jdesc->jd_jid); + gfs2_withdraw_delayed(sdp); + /* prevent more writes to the journal */ + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); wake_up(&sdp->sd_logd_waitq); } -- cgit v1.2.3 From d93ae386ef3d1bf4d683f870ad8e838159ec451d Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 29 Apr 2019 13:14:58 -0600 Subject: gfs2: Check for log write errors before telling dlm to unlock Before this patch, function do_xmote just assumed all the writes submitted to the journal were finished and successful, and it called the go_unlock function to release the dlm lock. But if they're not, and a revoke failed to make its way to the journal, a journal replay on another node will cause corruption if we let the go_inval function continue and tell dlm to release the glock to another node. This patch adds a couple checks for errors in do_xmote after the calls to go_sync and go_inval. If an error is found, we cannot withdraw yet, because the withdraw itself uses glocks to make the file system read-only. Instead, we flag the error. Later, asserts should cause another node to replay the journal before continuing, thus protecting rgrp and dinode glocks and maintaining the integrity of the metadata. Note that we only need to do this for journaled glocks. System glocks should be able to progress even under withdrawn conditions. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 5afaf92057c0..6af1edabef05 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -622,6 +622,32 @@ __acquires(&gl->gl_lockref.lock) } gfs2_glock_hold(gl); + /* + * Check for an error encountered since we called go_sync and go_inval. + * If so, we can't withdraw from the glock code because the withdraw + * code itself uses glocks (see function signal_our_withdraw) to + * change the mount to read-only. Most importantly, we must not call + * dlm to unlock the glock until the journal is in a known good state + * (after journal replay) otherwise other nodes may use the object + * (rgrp or dinode) and then later, journal replay will corrupt the + * file system. The best we can do here is wait for the logd daemon + * to see sd_log_error and withdraw, and in the meantime, requeue the + * work for later. + * + * However, if we're just unlocking the lock (say, for unmount, when + * gfs2_gl_hash_clear calls clear_glock) and recovery is complete + * then it's okay to tell dlm to unlock it. + */ + if (unlikely(sdp->sd_log_error && !gfs2_withdrawn(sdp))) + gfs2_withdraw_delayed(sdp); + if (glock_blocked_by_withdraw(gl)) { + if (target != LM_ST_UNLOCKED || + test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags)) { + gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); + goto out; + } + } + if (sdp->sd_lockstruct.ls_ops->lm_lock) { /* lock_dlm */ ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); @@ -630,8 +656,7 @@ __acquires(&gl->gl_lockref.lock) test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) { finish_xmote(gl, target); gfs2_glock_queue_work(gl, 0); - } - else if (ret) { + } else if (ret) { fs_err(sdp, "lm_lock ret %d\n", ret); GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp)); } @@ -639,7 +664,7 @@ __acquires(&gl->gl_lockref.lock) finish_xmote(gl, target); gfs2_glock_queue_work(gl, 0); } - +out: spin_lock(&gl->gl_lockref.lock); } -- cgit v1.2.3 From 9ff78289356af640941bbb0dd3f46af2063f0046 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 13 Nov 2019 13:47:02 -0600 Subject: gfs2: Do log_flush in gfs2_ail_empty_gl even if ail list is empty Before this patch, if gfs2_ail_empty_gl saw there was nothing on the ail list, it would return and not flush the log. The problem is that there could still be a revoke for the rgrp sitting on the sd_log_le_revoke list that's been recently taken off the ail list. But that revoke still needs to be written, and the rgrp_go_inval still needs to call log_flush_wait to ensure the revokes are all properly written to the journal before we relinquish control of the glock to another node. If we give the glock to another node before we have this knowledge, the node might crash and its journal replayed, in which case the missing revoke would allow the journal replay to replay the rgrp over top of the rgrp we already gave to another node, thus overwriting its changes and corrupting the file system. This patch makes gfs2_ail_empty_gl still call gfs2_log_flush rather than returning. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 27 ++++++++++++++++++++++++++- fs/gfs2/log.c | 2 +- fs/gfs2/log.h | 1 + 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index b58924482d9a..bbbcae8d853c 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -92,8 +92,32 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) INIT_LIST_HEAD(&tr.tr_databuf); tr.tr_revokes = atomic_read(&gl->gl_ail_count); - if (!tr.tr_revokes) + if (!tr.tr_revokes) { + bool have_revokes; + bool log_in_flight; + + /* + * We have nothing on the ail, but there could be revokes on + * the sdp revoke queue, in which case, we still want to flush + * the log and wait for it to finish. + * + * If the sdp revoke list is empty too, we might still have an + * io outstanding for writing revokes, so we should wait for + * it before returning. + * + * If none of these conditions are true, our revokes are all + * flushed and we can return. + */ + gfs2_log_lock(sdp); + have_revokes = !list_empty(&sdp->sd_log_revokes); + log_in_flight = atomic_read(&sdp->sd_log_in_flight); + gfs2_log_unlock(sdp); + if (have_revokes) + goto flush; + if (log_in_flight) + log_flush_wait(sdp); return; + } /* A shortened, inline version of gfs2_trans_begin() * tr->alloced is not set since the transaction structure is @@ -108,6 +132,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) __gfs2_ail_flush(gl, 0, tr.tr_revokes); gfs2_trans_end(sdp); +flush: gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_AIL_EMPTY_GL); } diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 578c1e0cd415..c37f81470792 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -537,7 +537,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) } -static void log_flush_wait(struct gfs2_sbd *sdp) +void log_flush_wait(struct gfs2_sbd *sdp) { DEFINE_WAIT(wait); diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index c0a65e5a126b..c1cd6ae17659 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -73,6 +73,7 @@ extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 type); extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc); +extern void log_flush_wait(struct gfs2_sbd *sdp); extern int gfs2_logd(void *data); extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); -- cgit v1.2.3 From b1676cbb11153b5bf4dd9e6c99869b284fb8160e Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 13 Nov 2019 13:53:42 -0600 Subject: gfs2: Withdraw in gfs2_ail1_flush if write_cache_pages fails Before this patch, function gfs2_ail1_start_one would return any errors it received from write_cache_pages (except -EBUSY) but it did not withdraw. Since function gfs2_ail1_flush just checks for the bad return code and loops, the loop might potentially never end. This patch adds some logic to allow it to exit the loop and withdraw properly when errors are received from write_cache_pages. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/log.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index c37f81470792..ed80ef8e5c33 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -96,6 +96,7 @@ __acquires(&sdp->sd_ail_lock) struct address_space *mapping; struct gfs2_bufdata *bd, *s; struct buffer_head *bh; + int ret = 0; list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) { bh = bd->bd_bh; @@ -128,14 +129,14 @@ __acquires(&sdp->sd_ail_lock) if (!mapping) continue; spin_unlock(&sdp->sd_ail_lock); - generic_writepages(mapping, wbc); + ret = generic_writepages(mapping, wbc); spin_lock(&sdp->sd_ail_lock); - if (wbc->nr_to_write <= 0) + if (ret || wbc->nr_to_write <= 0) break; - return 1; + return -EBUSY; } - return 0; + return ret; } @@ -153,6 +154,7 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) struct list_head *head = &sdp->sd_ail1_list; struct gfs2_trans *tr; struct blk_plug plug; + int ret = 0; trace_gfs2_ail_flush(sdp, wbc, 1); blk_start_plug(&plug); @@ -161,12 +163,16 @@ restart: list_for_each_entry_reverse(tr, head, tr_list) { if (wbc->nr_to_write <= 0) break; - if (gfs2_ail1_start_one(sdp, wbc, tr) && !gfs2_withdrawn(sdp)) - goto restart; + ret = gfs2_ail1_start_one(sdp, wbc, tr); + if (ret) { + if (ret == -EBUSY) + goto restart; + break; + } } spin_unlock(&sdp->sd_ail_lock); blk_finish_plug(&plug); - if (test_bit(SDF_WITHDRAWING, &sdp->sd_flags)) + if (ret) gfs2_withdraw(sdp); trace_gfs2_ail_flush(sdp, wbc, 0); } -- cgit v1.2.3 From 2ca0c2fbf3ed7f9609333a996149d02f70e8a6f3 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 13 Nov 2019 13:58:30 -0600 Subject: gfs2: drain the ail2 list after io errors Before this patch, gfs2_logd continually tried to flush its journal log, after the file system is withdrawn. We don't want to write anything to the journal, lest we add corruption. Best course of action is to drain the ail1 into the ail2 list (via gfs2_ail1_empty) then drain the ail2 list with a new function, ail2_drain. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/log.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++-------- fs/gfs2/trans.c | 4 ++++ 2 files changed, 65 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index ed80ef8e5c33..010c319caade 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -299,20 +299,17 @@ static void gfs2_ail1_wait(struct gfs2_sbd *sdp) } /** - * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced - * @sdp: the filesystem - * @ai: the AIL entry - * + * gfs2_ail_empty_tr - empty one of the ail lists for a transaction */ -static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) +static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + struct list_head *head) { - struct list_head *head = &tr->tr_ail2_list; struct gfs2_bufdata *bd; while (!list_empty(head)) { - bd = list_entry(head->prev, struct gfs2_bufdata, - bd_ail_st_list); + bd = list_first_entry(head, struct gfs2_bufdata, + bd_ail_st_list); gfs2_assert(sdp, bd->bd_tr == tr); gfs2_remove_from_ail(bd); } @@ -334,7 +331,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) if (!rm) continue; - gfs2_ail2_empty_one(sdp, tr); + gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); list_del(&tr->tr_list); gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list)); gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list)); @@ -801,6 +798,40 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) log_pull_tail(sdp, tail); } +/** + * ail_drain - drain the ail lists after a withdraw + * @sdp: Pointer to GFS2 superblock + */ +static void ail_drain(struct gfs2_sbd *sdp) +{ + struct gfs2_trans *tr; + + spin_lock(&sdp->sd_ail_lock); + /* + * For transactions on the sd_ail1_list we need to drain both the + * ail1 and ail2 lists. That's because function gfs2_ail1_start_one + * (temporarily) moves items from its tr_ail1 list to tr_ail2 list + * before revokes are sent for that block. Items on the sd_ail2_list + * should have already gotten beyond that point, so no need. + */ + while (!list_empty(&sdp->sd_ail1_list)) { + tr = list_first_entry(&sdp->sd_ail1_list, struct gfs2_trans, + tr_list); + gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail1_list); + gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); + list_del(&tr->tr_list); + kfree(tr); + } + while (!list_empty(&sdp->sd_ail2_list)) { + tr = list_first_entry(&sdp->sd_ail2_list, struct gfs2_trans, + tr_list); + gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); + list_del(&tr->tr_list); + kfree(tr); + } + spin_unlock(&sdp->sd_ail_lock); +} + /** * gfs2_log_flush - flush incore transaction(s) * @sdp: the filesystem @@ -811,11 +842,18 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) { - struct gfs2_trans *tr; + struct gfs2_trans *tr = NULL; enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); down_write(&sdp->sd_log_flush_lock); + /* + * Do this check while holding the log_flush_lock to prevent new + * buffers from being added to the ail via gfs2_pin() + */ + if (gfs2_withdrawn(sdp)) + goto out; + /* Log might have been flushed while we waited for the flush lock */ if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) { up_write(&sdp->sd_log_flush_lock); @@ -843,8 +881,14 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke); gfs2_ordered_write(sdp); + if (gfs2_withdrawn(sdp)) + goto out; lops_before_commit(sdp, tr); + if (gfs2_withdrawn(sdp)) + goto out; gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE); + if (gfs2_withdrawn(sdp)) + goto out; if (sdp->sd_log_head != sdp->sd_log_flush_head) { log_flush_wait(sdp); @@ -854,6 +898,8 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) trace_gfs2_log_blocks(sdp, -1); log_write_header(sdp, flags); } + if (gfs2_withdrawn(sdp)) + goto out; lops_after_commit(sdp, tr); gfs2_log_lock(sdp); @@ -892,6 +938,11 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) } out: + if (gfs2_withdrawn(sdp)) { + ail_drain(sdp); /* frees all transactions */ + tr = NULL; + } + trace_gfs2_log_flush(sdp, 0, flags); up_write(&sdp->sd_log_flush_lock); diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index a685637a5b55..ffe840505082 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -228,6 +228,10 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) fs_info(sdp, "GFS2:adding buf while frozen\n"); gfs2_assert_withdraw(sdp, 0); } + if (unlikely(gfs2_withdrawn(sdp))) { + fs_info(sdp, "GFS2:adding buf while withdrawn! 0x%llx\n", + (unsigned long long)bd->bd_bh->b_blocknr); + } gfs2_pin(sdp, bd->bd_bh); mh->__pad0 = cpu_to_be64(0); mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); -- cgit v1.2.3 From df5db5f9ee112e76b5202fbc331f990a0fc316d6 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 13 Nov 2019 14:08:45 -0600 Subject: gfs2: Don't demote a glock until its revokes are written Before this patch, run_queue would demote glocks based on whether there are any more holders. But if the glock has pending revokes that haven't been written to the media, giving up the glock might end in file system corruption if the revokes never get written due to io errors, node crashes and fences, etc. In that case, another node will replay the metadata blocks associated with the glock, but because the revoke was never written, it could replay that block even though the glock had since been granted to another node who might have made changes. This patch changes the logic in run_queue so that it never demotes a glock until its count of pending revokes reaches zero. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 6af1edabef05..73cb5bcc37a7 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -710,6 +710,9 @@ __acquires(&gl->gl_lockref.lock) goto out_unlock; if (nonblock) goto out_sched; + smp_mb(); + if (atomic_read(&gl->gl_revokes) != 0) + goto out_sched; set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); gl->gl_target = gl->gl_demote_state; -- cgit v1.2.3 From 1c634f94c3da39115270d35b3075af970810a927 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 13 Nov 2019 14:09:28 -0600 Subject: gfs2: Do proper error checking for go_sync family of glops functions Before this patch, function do_xmote would try to sync out the glock dirty data by calling the appropriate glops function XXX_go_sync() but it did not check for a good return code. If the sync was not possible due to an io error or whatever, do_xmote would continue on and call go_inval and release the glock to other cluster nodes. When those nodes go to replay the journal, they may already be holding glocks for the journal records that should have been synced, but were not due to the ignored error. This patch introduces proper error code checking to the go_sync family of glops functions. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 16 ++++++++++++++-- fs/gfs2/glops.c | 30 +++++++++++++++++++----------- fs/gfs2/incore.h | 2 +- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 73cb5bcc37a7..0bfa58e5a64e 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -602,8 +602,20 @@ __acquires(&gl->gl_lockref.lock) (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) clear_bit(GLF_BLOCKING, &gl->gl_flags); spin_unlock(&gl->gl_lockref.lock); - if (glops->go_sync) - glops->go_sync(gl); + if (glops->go_sync) { + ret = glops->go_sync(gl); + /* If we had a problem syncing (due to io errors or whatever, + * we should not invalidate the metadata or tell dlm to + * release the glock to other nodes. + */ + if (ret) { + if (cmpxchg(&sdp->sd_log_error, 0, ret)) { + fs_err(sdp, "Error %d syncing glock \n", ret); + gfs2_dump_glock(NULL, gl, true); + } + return; + } + } if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) { /* * The call to go_sync should have cleared out the ail list. diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index bbbcae8d853c..9e9c7a4b8c66 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -82,10 +82,11 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, } -static void gfs2_ail_empty_gl(struct gfs2_glock *gl) +static int gfs2_ail_empty_gl(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_trans tr; + int ret; memset(&tr, 0, sizeof(tr)); INIT_LIST_HEAD(&tr.tr_buf); @@ -116,7 +117,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) goto flush; if (log_in_flight) log_flush_wait(sdp); - return; + return 0; } /* A shortened, inline version of gfs2_trans_begin() @@ -124,8 +125,9 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) * on the stack */ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes); tr.tr_ip = _RET_IP_; - if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) - return; + ret = gfs2_log_reserve(sdp, tr.tr_reserved); + if (ret < 0) + return ret; WARN_ON_ONCE(current->journal_info); current->journal_info = &tr; @@ -135,6 +137,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) flush: gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_AIL_EMPTY_GL); + return 0; } void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) @@ -168,7 +171,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) * return to caller to demote/unlock the glock until I/O is complete. */ -static void rgrp_go_sync(struct gfs2_glock *gl) +static int rgrp_go_sync(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct address_space *mapping = &sdp->sd_aspace; @@ -176,21 +179,24 @@ static void rgrp_go_sync(struct gfs2_glock *gl) int error; if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) - return; + return 0; GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_RGRP_GO_SYNC); filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end); error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end); + WARN_ON_ONCE(error); mapping_set_error(mapping, error); - gfs2_ail_empty_gl(gl); + if (!error) + error = gfs2_ail_empty_gl(gl); spin_lock(&gl->gl_lockref.lock); rgd = gl->gl_object; if (rgd) gfs2_free_clones(rgd); spin_unlock(&gl->gl_lockref.lock); + return error; } /** @@ -257,12 +263,12 @@ static void gfs2_clear_glop_pending(struct gfs2_inode *ip) * */ -static void inode_go_sync(struct gfs2_glock *gl) +static int inode_go_sync(struct gfs2_glock *gl) { struct gfs2_inode *ip = gfs2_glock2inode(gl); int isreg = ip && S_ISREG(ip->i_inode.i_mode); struct address_space *metamapping = gfs2_glock2aspace(gl); - int error; + int error = 0; if (isreg) { if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) @@ -295,6 +301,7 @@ static void inode_go_sync(struct gfs2_glock *gl) out: gfs2_clear_glop_pending(ip); + return error; } /** @@ -515,7 +522,7 @@ static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl, * */ -static void freeze_go_sync(struct gfs2_glock *gl) +static int freeze_go_sync(struct gfs2_glock *gl) { int error = 0; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; @@ -529,7 +536,7 @@ static void freeze_go_sync(struct gfs2_glock *gl) error); if (gfs2_withdrawn(sdp)) { atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); - return; + return 0; } gfs2_assert_withdraw(sdp, 0); } @@ -537,6 +544,7 @@ static void freeze_go_sync(struct gfs2_glock *gl) gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | GFS2_LFC_FREEZE_GO_SYNC); } + return 0; } /** diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 8cd564bcf5e6..04549a8cae7e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -234,7 +234,7 @@ struct lm_lockname { struct gfs2_glock_operations { - void (*go_sync) (struct gfs2_glock *gl); + int (*go_sync) (struct gfs2_glock *gl); int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh); void (*go_inval) (struct gfs2_glock *gl, int flags); int (*go_demote_ok) (const struct gfs2_glock *gl); -- cgit v1.2.3 From ca399c96e96e3f372f901a698a6fd17796b8ed32 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 8 Jan 2020 11:37:30 -0600 Subject: gfs2: flesh out delayed withdraw for gfs2_log_flush Function gfs2_log_flush() had a few places where it tried to withdraw from the file system when errors were encountered. The problem is, it should delay those withdraws until the log flush lock is no longer held. This patch creates a new function just for delayed withdraws for situations like this. If errors=panic was specified on mount, we still want to do it the old fashioned way because the panic it does not help to delay in that situation. Signed-off-by: Bob Peterson Reviewed-by: Andreas Gruenbacher --- fs/gfs2/log.c | 12 ++++++++---- fs/gfs2/util.c | 27 +++++++++++++++++++++------ fs/gfs2/util.h | 14 ++++++++++++-- 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 010c319caade..67465a34954e 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -872,13 +872,17 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) INIT_LIST_HEAD(&tr->tr_ail2_list); tr->tr_first = sdp->sd_log_flush_head; if (unlikely (state == SFS_FROZEN)) - gfs2_assert_withdraw(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new); + if (gfs2_assert_withdraw_delayed(sdp, + !tr->tr_num_buf_new && !tr->tr_num_databuf_new)) + goto out; } if (unlikely(state == SFS_FROZEN)) - gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); - gfs2_assert_withdraw(sdp, - sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke); + if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke)) + goto out; + if (gfs2_assert_withdraw_delayed(sdp, + sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke)) + goto out; gfs2_ordered_write(sdp); if (gfs2_withdrawn(sdp)) diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 20a5860841e2..9b64d40ab379 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -318,13 +318,28 @@ int gfs2_withdraw(struct gfs2_sbd *sdp) */ void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, - const char *function, char *file, unsigned int line) + const char *function, char *file, unsigned int line, + bool delayed) { - gfs2_lm(sdp, - "fatal: assertion \"%s\" failed\n" - " function = %s, file = %s, line = %u\n", - assertion, function, file, line); - gfs2_withdraw(sdp); + if (gfs2_withdrawn(sdp)) + return; + + fs_err(sdp, + "fatal: assertion \"%s\" failed\n" + " function = %s, file = %s, line = %u\n", + assertion, function, file, line); + + /* + * If errors=panic was specified on mount, it won't help to delay the + * withdraw. + */ + if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) + delayed = false; + + if (delayed) + gfs2_withdraw_delayed(sdp); + else + gfs2_withdraw(sdp); dump_stack(); } diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 97117a766bde..a3542560da6f 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -37,14 +37,24 @@ do { \ void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, - const char *function, char *file, unsigned int line); + const char *function, char *file, unsigned int line, + bool delayed); #define gfs2_assert_withdraw(sdp, assertion) \ ({ \ bool _bool = (assertion); \ if (unlikely(!_bool)) \ gfs2_assert_withdraw_i((sdp), #assertion, \ - __func__, __FILE__, __LINE__); \ + __func__, __FILE__, __LINE__, false); \ + !_bool; \ + }) + +#define gfs2_assert_withdraw_delayed(sdp, assertion) \ + ({ \ + bool _bool = (assertion); \ + if (unlikely(!_bool)) \ + gfs2_assert_withdraw_i((sdp), #assertion, \ + __func__, __FILE__, __LINE__, true); \ !_bool; \ }) -- cgit v1.2.3 From 019dd669bde14bc0748bc43af2f96e2c5e37d3f8 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 17 Feb 2020 14:14:13 -0600 Subject: gfs2: don't allow releasepage to free bd still used for revokes Before this patch, function gfs2_releasepage would free any bd elements that had been used for the page being released. However, those bd elements may still be queued to the sd_log_revokes list, in which case we cannot free them until the revoke has been issued. This patch adds additional checks for bds that are still being used for revokes. Signed-off-by: Bob Peterson --- fs/gfs2/aops.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index ba83b49ce18c..786c1ce8f030 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -805,11 +805,16 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) bd = bh->b_private; if (bd) { gfs2_assert_warn(sdp, bd->bd_bh == bh); - if (!list_empty(&bd->bd_list)) - list_del_init(&bd->bd_list); bd->bd_bh = NULL; bh->b_private = NULL; - kmem_cache_free(gfs2_bufdata_cachep, bd); + /* + * The bd may still be queued as a revoke, in which + * case we must not dequeue nor free it. + */ + if (!bd->bd_blkno && !list_empty(&bd->bd_list)) + list_del_init(&bd->bd_list); + if (list_empty(&bd->bd_list)) + kmem_cache_free(gfs2_bufdata_cachep, bd); } bh = bh->b_this_page; -- cgit v1.2.3 From c9ebc4b737999ab1f3264c42431f7be80ac2efbf Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 17 Feb 2020 14:15:05 -0600 Subject: gfs2: allow journal replay to hold sd_log_flush_lock Before this patch, journal replays could stomp on log flushes and each other because both log flushes and journal replays used the same sd_log_bio. Function gfs2_log_flush prevents other log flushes from interfering by taking the sd_log_flush_lock rwsem during the flush. However, it does not protect against journal replays. This patch allows the journal replay to take the same sd_log_flush_lock rwsem so use of the sd_log_bio is not stomped. Signed-off-by: Bob Peterson --- fs/gfs2/recovery.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 8cc26bef4e64..21fc44b31863 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -398,6 +398,10 @@ void gfs2_recover_func(struct work_struct *work) fs_info(sdp, "jid=%u: Replaying journal...0x%x to 0x%x\n", jd->jd_jid, head.lh_tail, head.lh_blkno); + /* We take the sd_log_flush_lock here primarily to prevent log + * flushes and simultaneous journal replays from stomping on + * each other wrt sd_log_bio. */ + down_write(&sdp->sd_log_flush_lock); for (pass = 0; pass < 2; pass++) { lops_before_scan(jd, &head, pass); error = foreach_descriptor(jd, head.lh_tail, @@ -408,6 +412,7 @@ void gfs2_recover_func(struct work_struct *work) } clean_journal(jd, &head); + up_write(&sdp->sd_log_flush_lock); gfs2_glock_dq_uninit(&thaw_gh); t_rep = ktime_get(); -- cgit v1.2.3 From cc44457f16296809e40aae31415cd081a8352433 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 3 Jan 2020 08:48:43 -0600 Subject: gfs2: leaf_dealloc needs to allocate one more revoke Function leaf_dealloc was not allocating enough journal space for revokes. Before, it allocated 'l_blocks' revokes, but it needs one more for the revoke of the dinode that is modified. This patch adds the needed revoke entry to function leaf_dealloc. Signed-off-by: Bob Peterson --- fs/gfs2/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index c8b62577e2f2..c3f7732415be 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -2028,7 +2028,8 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, error = gfs2_trans_begin(sdp, rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) + - RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks); + RES_DINODE + RES_STATFS + RES_QUOTA, RES_DINODE + + l_blocks); if (error) goto out_rg_gunlock; -- cgit v1.2.3 From 490031281d5a33fbdab59e98e165c2cd30fc841b Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 6 Mar 2020 10:15:03 -0600 Subject: gfs2: Additional information when gfs2_ail1_flush withdraws Before this patch, if gfs2_ail1_flush gets an error from function gfs2_ail1_start_one (which comes indirectly from generic_writepages) the file system is withdrawn, but without any explanation why. This patch adds an error message if gfs2_ail1_flush gets an error from gfs2_ail1_start_one. Signed-off-by: Bob Peterson --- fs/gfs2/log.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 67465a34954e..87f3e892be3e 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -172,8 +172,11 @@ restart: } spin_unlock(&sdp->sd_ail_lock); blk_finish_plug(&plug); - if (ret) + if (ret) { + gfs2_lm(sdp, "gfs2_ail1_start_one (generic_writepages) " + "returned: %d\n", ret); gfs2_withdraw(sdp); + } trace_gfs2_ail_flush(sdp, wbc, 0); } -- cgit v1.2.3 From 40e7e86ef16550c7371939c7025041b7740f252e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 24 Jan 2020 14:14:46 +0100 Subject: gfs2: Clean up inode initialization and teardown When allocating a new inode, mark the iopen glock holder as uninitialized to make sure gfs2_evict_inode won't fail after an incomplete create or lookup. In gfs2_evict_inode, allow the inode glock to be NULL and remove the duplicate iopen glock teardown code. In gfs2_inode_lookup, don't tear down things that gfs2_evict_inode will already tear down. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/inode.c | 18 ++++++------------ fs/gfs2/super.c | 28 +++++++++++++--------------- 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 2716d56ed0a0..b5d04f3a247e 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -144,7 +144,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); if (unlikely(error)) - goto fail_put; + goto fail; if (type == DT_UNKNOWN || blktype != GFS2_BLKST_FREE) { /* @@ -155,13 +155,13 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &i_gh); if (error) - goto fail_put; + goto fail; if (blktype != GFS2_BLKST_FREE) { error = gfs2_check_blk_type(sdp, no_addr, blktype); if (error) - goto fail_put; + goto fail; } } @@ -169,7 +169,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, set_bit(GIF_INVALID, &ip->i_flags); error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (unlikely(error)) - goto fail_put; + goto fail; glock_set_object(ip->i_iopen_gh.gh_gl, ip); gfs2_glock_put(io_gl); io_gl = NULL; @@ -182,7 +182,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, /* Inode glock must be locked already */ error = gfs2_inode_refresh(GFS2_I(inode)); if (error) - goto fail_refresh; + goto fail; } else { ip->i_no_formal_ino = no_formal_ino; inode->i_mode = DT2IF(type); @@ -197,17 +197,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, gfs2_glock_dq_uninit(&i_gh); return inode; -fail_refresh: - ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - glock_clear_object(ip->i_iopen_gh.gh_gl, ip); - gfs2_glock_dq_uninit(&ip->i_iopen_gh); -fail_put: +fail: if (io_gl) gfs2_glock_put(io_gl); - glock_clear_object(ip->i_gl, ip); if (gfs2_holder_initialized(&i_gh)) gfs2_glock_dq_uninit(&i_gh); -fail: iget_failed(inode); return ERR_PTR(error); } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 693c6d13473c..d664b0175946 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1393,14 +1393,6 @@ out_unlock: if (gfs2_rs_active(&ip->i_res)) gfs2_rs_deltree(&ip->i_res); - if (gfs2_holder_initialized(&ip->i_iopen_gh)) { - glock_clear_object(ip->i_iopen_gh.gh_gl, ip); - if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { - ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq(&ip->i_iopen_gh); - } - gfs2_holder_uninit(&ip->i_iopen_gh); - } if (gfs2_holder_initialized(&gh)) { glock_clear_object(ip->i_gl, ip); gfs2_glock_dq_uninit(&gh); @@ -1413,18 +1405,23 @@ out: gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); - glock_clear_object(ip->i_gl, ip); - wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE); - gfs2_glock_add_to_lru(ip->i_gl); - gfs2_glock_put_eventually(ip->i_gl); - ip->i_gl = NULL; + if (ip->i_gl) { + glock_clear_object(ip->i_gl, ip); + wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE); + gfs2_glock_add_to_lru(ip->i_gl); + gfs2_glock_put_eventually(ip->i_gl); + ip->i_gl = NULL; + } if (gfs2_holder_initialized(&ip->i_iopen_gh)) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; glock_clear_object(gl, ip); - ip->i_iopen_gh.gh_flags |= GL_NOCACHE; + if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq(&ip->i_iopen_gh); + } gfs2_glock_hold(gl); - gfs2_glock_dq_uninit(&ip->i_iopen_gh); + gfs2_holder_uninit(&ip->i_iopen_gh); gfs2_glock_put_eventually(gl); } } @@ -1438,6 +1435,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) return NULL; ip->i_flags = 0; ip->i_gl = NULL; + gfs2_holder_mark_uninitialized(&ip->i_iopen_gh); memset(&ip->i_res, 0, sizeof(ip->i_res)); RB_CLEAR_NODE(&ip->i_res.rs_node); ip->i_rahead = 0; -- cgit v1.2.3 From 969183bc68bc27d637d6d29e81d71cf854d0ca61 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 3 Feb 2020 19:22:45 +0100 Subject: gfs2: Switch to list_{first,last}_entry Replace open-coded versions of list_first_entry and list_last_entry with those functions. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/bmap.c | 4 ++-- fs/gfs2/glock.c | 10 +++++----- fs/gfs2/log.c | 6 +++--- fs/gfs2/lops.c | 6 +++--- fs/gfs2/quota.c | 6 +++--- fs/gfs2/recovery.c | 2 +- fs/gfs2/super.c | 4 ++-- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 08f6fbb3655e..2fe4457e1d01 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -2223,7 +2223,7 @@ void gfs2_free_journal_extents(struct gfs2_jdesc *jd) struct gfs2_journal_extent *jext; while(!list_empty(&jd->extent_list)) { - jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list); + jext = list_first_entry(&jd->extent_list, struct gfs2_journal_extent, list); list_del(&jext->list); kfree(jext); } @@ -2244,7 +2244,7 @@ static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 b struct gfs2_journal_extent *jext; if (!list_empty(&jd->extent_list)) { - jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list); + jext = list_last_entry(&jd->extent_list, struct gfs2_journal_extent, list); if ((jext->dblock + jext->blocks) == dblock) { jext->blocks += blocks; return 0; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 0bfa58e5a64e..29f9b6684b74 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -308,7 +308,7 @@ void gfs2_glock_put(struct gfs2_glock *gl) static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh) { - const struct gfs2_holder *gh_head = list_entry(gl->gl_holders.next, const struct gfs2_holder, gh_list); + const struct gfs2_holder *gh_head = list_first_entry(&gl->gl_holders, const struct gfs2_holder, gh_list); if ((gh->gh_state == LM_ST_EXCLUSIVE || gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head) return 0; @@ -690,7 +690,7 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) struct gfs2_holder *gh; if (!list_empty(&gl->gl_holders)) { - gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); + gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list); if (test_bit(HIF_HOLDER, &gh->gh_iflags)) return gh; } @@ -1240,7 +1240,7 @@ fail: } list_add_tail(&gh->gh_list, insert_pt); do_cancel: - gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); + gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list); if (!(gh->gh_flags & LM_FLAG_PRIORITY)) { spin_unlock(&gl->gl_lockref.lock); if (sdp->sd_lockstruct.ls_ops->lm_cancel) @@ -1642,7 +1642,7 @@ __acquires(&lru_lock) list_sort(NULL, list, glock_cmp); while(!list_empty(list)) { - gl = list_entry(list->next, struct gfs2_glock, gl_lru); + gl = list_first_entry(list, struct gfs2_glock, gl_lru); list_del_init(&gl->gl_lru); if (!spin_trylock(&gl->gl_lockref.lock)) { add_back_to_lru: @@ -1683,7 +1683,7 @@ static long gfs2_scan_glock_lru(int nr) spin_lock(&lru_lock); while ((nr-- >= 0) && !list_empty(&lru_list)) { - gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); + gl = list_first_entry(&lru_list, struct gfs2_glock, gl_lru); /* Test for being demotable */ if (!test_bit(GLF_LOCK, &gl->gl_flags)) { diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 87f3e892be3e..8729f5f3a98c 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -518,7 +518,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp) if (list_empty(&sdp->sd_ail1_list)) { tail = sdp->sd_log_head; } else { - tr = list_entry(sdp->sd_ail1_list.prev, struct gfs2_trans, + tr = list_last_entry(&sdp->sd_ail1_list, struct gfs2_trans, tr_list); tail = tr->tr_first; } @@ -580,7 +580,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) spin_lock(&sdp->sd_ordered_lock); list_sort(NULL, &sdp->sd_log_ordered, &ip_cmp); while (!list_empty(&sdp->sd_log_ordered)) { - ip = list_entry(sdp->sd_log_ordered.next, struct gfs2_inode, i_ordered); + ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered); if (ip->i_inode.i_mapping->nrpages == 0) { test_and_clear_bit(GIF_ORDERED, &ip->i_flags); list_del(&ip->i_ordered); @@ -601,7 +601,7 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp) spin_lock(&sdp->sd_ordered_lock); while (!list_empty(&sdp->sd_log_ordered)) { - ip = list_entry(sdp->sd_log_ordered.next, struct gfs2_inode, i_ordered); + ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered); list_del(&ip->i_ordered); WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags)); if (ip->i_inode.i_mapping->nrpages == 0) diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 7307e5e721d1..5ea96757afc4 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -734,7 +734,7 @@ static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) head = &tr->tr_buf; while (!list_empty(head)) { - bd = list_entry(head->next, struct gfs2_bufdata, bd_list); + bd = list_first_entry(head, struct gfs2_bufdata, bd_list); list_del_init(&bd->bd_list); gfs2_unpin(sdp, bd->bd_bh, tr); } @@ -904,7 +904,7 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) struct gfs2_glock *gl; while (!list_empty(head)) { - bd = list_entry(head->next, struct gfs2_bufdata, bd_list); + bd = list_first_entry(head, struct gfs2_bufdata, bd_list); list_del_init(&bd->bd_list); gl = bd->bd_gl; gfs2_glock_remove_revoke(gl); @@ -1083,7 +1083,7 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) head = &tr->tr_databuf; while (!list_empty(head)) { - bd = list_entry(head->next, struct gfs2_bufdata, bd_list); + bd = list_first_entry(head, struct gfs2_bufdata, bd_list); list_del_init(&bd->bd_list); gfs2_unpin(sdp, bd->bd_bh, tr); } diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 43ffe5997098..8290f60f9877 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -115,7 +115,7 @@ static void gfs2_qd_dispose(struct list_head *list) struct gfs2_sbd *sdp; while (!list_empty(list)) { - qd = list_entry(list->next, struct gfs2_quota_data, qd_lru); + qd = list_first_entry(list, struct gfs2_quota_data, qd_lru); sdp = qd->qd_gl->gl_name.ln_sbd; list_del(&qd->qd_lru); @@ -1441,7 +1441,7 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) spin_lock(&qd_lock); while (!list_empty(head)) { - qd = list_entry(head->prev, struct gfs2_quota_data, qd_list); + qd = list_last_entry(head, struct gfs2_quota_data, qd_list); list_del(&qd->qd_list); @@ -1504,7 +1504,7 @@ static void quotad_check_trunc_list(struct gfs2_sbd *sdp) ip = NULL; spin_lock(&sdp->sd_trunc_lock); if (!list_empty(&sdp->sd_trunc_list)) { - ip = list_entry(sdp->sd_trunc_list.next, + ip = list_first_entry(&sdp->sd_trunc_list, struct gfs2_inode, i_trunc_list); list_del_init(&ip->i_trunc_list); } diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 21fc44b31863..34dfdb211439 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -111,7 +111,7 @@ void gfs2_revoke_clean(struct gfs2_jdesc *jd) struct gfs2_revoke_replay *rr; while (!list_empty(head)) { - rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list); + rr = list_first_entry(head, struct gfs2_revoke_replay, rr_list); list_del(&rr->rr_list); kfree(rr); } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index d664b0175946..a6bf8f1e083e 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -63,7 +63,7 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp) sdp->sd_jdesc = NULL; while (!list_empty(&list)) { - jd = list_entry(list.next, struct gfs2_jdesc, jd_list); + jd = list_first_entry(&list, struct gfs2_jdesc, jd_list); gfs2_free_journal_extents(jd); list_del(&jd->jd_list); iput(jd->jd_inode); @@ -452,7 +452,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) out: while (!list_empty(&list)) { - lfcc = list_entry(list.next, struct lfcc, list); + lfcc = list_first_entry(&list, struct lfcc, list); list_del(&lfcc->list); gfs2_glock_dq_uninit(&lfcc->gh); kfree(lfcc); -- cgit v1.2.3 From d580712a37272182cb63002878f3bb7bcebbb8bd Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 6 Mar 2020 10:18:44 -0600 Subject: gfs2: eliminate gfs2_rsqa_alloc in favor of gfs2_qa_alloc Before this patch, multiple callers called gfs2_rsqa_alloc to force the existence of a reservations structure and a quota data structure if needed. However, now the reservations are handled separately, so the quota data is only the quota data. So we eliminate the one in favor of just calling gfs2_qa_alloc directly. Signed-off-by: Bob Peterson --- fs/gfs2/acl.c | 3 ++- fs/gfs2/bmap.c | 2 +- fs/gfs2/file.c | 8 ++++---- fs/gfs2/inode.c | 12 ++++++------ fs/gfs2/quota.c | 6 +++--- fs/gfs2/rgrp.c | 10 ---------- fs/gfs2/rgrp.h | 1 - fs/gfs2/xattr.c | 2 +- 8 files changed, 17 insertions(+), 27 deletions(-) diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 09e6be8aa036..cb09b85c5b10 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -21,6 +21,7 @@ #include "glock.h" #include "inode.h" #include "meta_io.h" +#include "quota.h" #include "rgrp.h" #include "trans.h" #include "util.h" @@ -116,7 +117,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (acl && acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode))) return -E2BIG; - ret = gfs2_rsqa_alloc(ip); + ret = gfs2_qa_alloc(ip); if (ret) return ret; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 2fe4457e1d01..4b9dbab50faf 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -2183,7 +2183,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) inode_dio_wait(inode); - ret = gfs2_rsqa_alloc(ip); + ret = gfs2_qa_alloc(ip); if (ret) goto out; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index cb26be6f4351..54b0708e6d35 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -458,7 +458,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) sb_start_pagefault(inode->i_sb); - ret = gfs2_rsqa_alloc(ip); + ret = gfs2_qa_alloc(ip); if (ret) goto out; @@ -849,7 +849,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct gfs2_inode *ip = GFS2_I(inode); ssize_t ret; - ret = gfs2_rsqa_alloc(ip); + ret = gfs2_qa_alloc(ip); if (ret) return ret; @@ -1149,7 +1149,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le if (mode & FALLOC_FL_PUNCH_HOLE) { ret = __gfs2_punch_hole(file, offset, len); } else { - ret = gfs2_rsqa_alloc(ip); + ret = gfs2_qa_alloc(ip); if (ret) goto out_putw; @@ -1176,7 +1176,7 @@ static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, int error; struct gfs2_inode *ip = GFS2_I(out->f_mapping->host); - error = gfs2_rsqa_alloc(ip); + error = gfs2_qa_alloc(ip); if (error) return (ssize_t)error; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index b5d04f3a247e..710f1c644f87 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -588,7 +588,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!name->len || name->len > GFS2_FNAMESIZE) return -ENAMETOOLONG; - error = gfs2_rsqa_alloc(dip); + error = gfs2_qa_alloc(dip); if (error) return error; @@ -641,7 +641,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, goto fail_gunlock; ip = GFS2_I(inode); - error = gfs2_rsqa_alloc(ip); + error = gfs2_qa_alloc(ip); if (error) goto fail_free_acls; @@ -899,7 +899,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (S_ISDIR(inode->i_mode)) return -EPERM; - error = gfs2_rsqa_alloc(dip); + error = gfs2_qa_alloc(dip); if (error) return error; @@ -1362,7 +1362,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) return error; - error = gfs2_rsqa_alloc(ndip); + error = gfs2_qa_alloc(ndip); if (error) return error; @@ -1874,7 +1874,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid)) ogid = ngid = NO_GID_QUOTA_CHANGE; - error = gfs2_rsqa_alloc(ip); + error = gfs2_qa_alloc(ip); if (error) goto out; @@ -1935,7 +1935,7 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) struct gfs2_holder i_gh; int error; - error = gfs2_rsqa_alloc(ip); + error = gfs2_qa_alloc(ip); if (error) return error; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 8290f60f9877..cbe45e8eb2e0 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -567,7 +567,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) return 0; if (ip->i_qadata == NULL) { - error = gfs2_rsqa_alloc(ip); + error = gfs2_qa_alloc(ip); if (error) return error; } @@ -876,7 +876,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) unsigned int nalloc = 0, blocks; int error; - error = gfs2_rsqa_alloc(ip); + error = gfs2_qa_alloc(ip); if (error) return error; @@ -1677,7 +1677,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, if (error) return error; - error = gfs2_rsqa_alloc(ip); + error = gfs2_qa_alloc(ip); if (error) goto out_put; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 2ee2f7d48bc1..3e3696da5bcb 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -590,16 +590,6 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) } } -/** - * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode - * plus a quota allocations data structure, if necessary - * @ip: the inode for this reservation - */ -int gfs2_rsqa_alloc(struct gfs2_inode *ip) -{ - return gfs2_qa_alloc(ip); -} - static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs, const char *fs_id_buf) { diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index a584f3096418..92cebb785996 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -44,7 +44,6 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip); extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); -extern int gfs2_rsqa_alloc(struct gfs2_inode *ip); extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); extern void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount); extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index bbe593d16bea..c4fbb96e001f 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -1222,7 +1222,7 @@ static int gfs2_xattr_set(const struct xattr_handler *handler, struct gfs2_holder gh; int ret; - ret = gfs2_rsqa_alloc(ip); + ret = gfs2_qa_alloc(ip); if (ret) return ret; -- cgit v1.2.3 From 2fba46a04c383f91e7fe837d43bf1ab33ce32b6a Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 27 Feb 2020 12:47:53 -0600 Subject: gfs2: Change inode qa_data to allow multiple users Before this patch, multiple users called gfs2_qa_alloc which allocated a qadata structure to the inode, if quotas are turned on. Later, in file close or evict, the structure was deleted with gfs2_qa_delete. But there can be several competing processes who need access to the structure. There were races between file close (release) and the others. Thus, a release could delete the structure out from under a process that relied upon its existence. For example, chown. This patch changes the management of the qadata structures to be a get/put scheme. Function gfs2_qa_alloc has been changed to gfs2_qa_get and if the structure is allocated, the count essentially starts out at 1. Function gfs2_qa_delete has been renamed to gfs2_qa_put, and the last guy to decrement the count to 0 frees the memory. Signed-off-by: Bob Peterson --- fs/gfs2/acl.c | 6 ++++-- fs/gfs2/bmap.c | 2 +- fs/gfs2/file.c | 35 ++++++++++++++++++++++--------- fs/gfs2/incore.h | 1 + fs/gfs2/inode.c | 32 ++++++++++++++++------------ fs/gfs2/quota.c | 63 ++++++++++++++++++++++++++++++++++---------------------- fs/gfs2/quota.h | 4 ++-- fs/gfs2/rgrp.c | 2 +- fs/gfs2/super.c | 2 ++ fs/gfs2/xattr.c | 12 +++++++---- 10 files changed, 101 insertions(+), 58 deletions(-) diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index cb09b85c5b10..2e939f5fe751 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -117,14 +117,14 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (acl && acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode))) return -E2BIG; - ret = gfs2_qa_alloc(ip); + ret = gfs2_qa_get(ip); if (ret) return ret; if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); if (ret) - return ret; + goto out; need_unlock = true; } @@ -144,5 +144,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) unlock: if (need_unlock) gfs2_glock_dq_uninit(&gh); +out: + gfs2_qa_put(ip); return ret; } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 4b9dbab50faf..d510a453dfa8 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -2183,7 +2183,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) inode_dio_wait(inode); - ret = gfs2_qa_alloc(ip); + ret = gfs2_qa_get(ip); if (ret) goto out; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 54b0708e6d35..f18876cdfb0f 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -458,7 +458,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) sb_start_pagefault(inode->i_sb); - ret = gfs2_qa_alloc(ip); + ret = gfs2_qa_get(ip); if (ret) goto out; @@ -553,6 +553,7 @@ out_quota_unlock: out_unlock: gfs2_glock_dq(&gh); out_uninit: + gfs2_qa_put(ip); gfs2_holder_uninit(&gh); if (ret == 0) { set_page_dirty(page); @@ -635,7 +636,17 @@ int gfs2_open_common(struct inode *inode, struct file *file) gfs2_assert_warn(GFS2_SB(inode), !file->private_data); file->private_data = fp; + if (file->f_mode & FMODE_WRITE) { + ret = gfs2_qa_get(GFS2_I(inode)); + if (ret) + goto fail; + } return 0; + +fail: + kfree(file->private_data); + file->private_data = NULL; + return ret; } /** @@ -690,10 +701,8 @@ static int gfs2_release(struct inode *inode, struct file *file) kfree(file->private_data); file->private_data = NULL; - if (!(file->f_mode & FMODE_WRITE)) - return 0; - - gfs2_rsqa_delete(ip, &inode->i_writecount); + if (file->f_mode & FMODE_WRITE) + gfs2_rsqa_delete(ip, &inode->i_writecount); return 0; } @@ -849,7 +858,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct gfs2_inode *ip = GFS2_I(inode); ssize_t ret; - ret = gfs2_qa_alloc(ip); + ret = gfs2_qa_get(ip); if (ret) return ret; @@ -860,7 +869,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) - return ret; + goto out; gfs2_glock_dq_uninit(&gh); } @@ -918,6 +927,8 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) out_unlock: inode_unlock(inode); +out: + gfs2_qa_put(ip); return ret; } @@ -1149,7 +1160,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le if (mode & FALLOC_FL_PUNCH_HOLE) { ret = __gfs2_punch_hole(file, offset, len); } else { - ret = gfs2_qa_alloc(ip); + ret = gfs2_qa_get(ip); if (ret) goto out_putw; @@ -1157,6 +1168,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le if (ret) gfs2_rs_deltree(&ip->i_res); + gfs2_qa_put(ip); } out_putw: @@ -1175,14 +1187,17 @@ static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, { int error; struct gfs2_inode *ip = GFS2_I(out->f_mapping->host); + ssize_t ret; - error = gfs2_qa_alloc(ip); + error = gfs2_qa_get(ip); if (error) return (ssize_t)error; gfs2_size_hint(out, *ppos, len); - return iter_file_splice_write(pipe, out, ppos, len, flags); + ret = iter_file_splice_write(pipe, out, ppos, len, flags); + gfs2_qa_put(ip); + return ret; } #ifdef CONFIG_GFS2_FS_LOCKING_DLM diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 04549a8cae7e..84a824293a78 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -295,6 +295,7 @@ struct gfs2_qadata { /* quota allocation data */ struct gfs2_quota_data *qa_qd[2 * GFS2_MAXQUOTAS]; struct gfs2_holder qa_qd_ghs[2 * GFS2_MAXQUOTAS]; unsigned int qa_qd_num; + int qa_ref; }; /* Resource group multi-block reservation, in order of appearance: diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 710f1c644f87..d1a24753c55f 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -588,13 +588,13 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!name->len || name->len > GFS2_FNAMESIZE) return -ENAMETOOLONG; - error = gfs2_qa_alloc(dip); + error = gfs2_qa_get(dip); if (error) return error; error = gfs2_rindex_update(sdp); if (error) - return error; + goto fail; error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); if (error) @@ -641,7 +641,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, goto fail_gunlock; ip = GFS2_I(inode); - error = gfs2_qa_alloc(ip); + error = gfs2_qa_get(ip); if (error) goto fail_free_acls; @@ -776,6 +776,7 @@ fail_gunlock2: clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); gfs2_glock_put(io_gl); fail_free_inode: + gfs2_qa_put(ip); if (ip->i_gl) { glock_clear_object(ip->i_gl, ip); gfs2_glock_put(ip->i_gl); @@ -798,6 +799,7 @@ fail_gunlock: if (gfs2_holder_initialized(ghs + 1)) gfs2_glock_dq_uninit(ghs + 1); fail: + gfs2_qa_put(dip); return error; } @@ -899,7 +901,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (S_ISDIR(inode->i_mode)) return -EPERM; - error = gfs2_qa_alloc(dip); + error = gfs2_qa_get(dip); if (error) return error; @@ -1002,6 +1004,7 @@ out_gunlock: out_child: gfs2_glock_dq(ghs); out_parent: + gfs2_qa_put(ip); gfs2_holder_uninit(ghs); gfs2_holder_uninit(ghs + 1); return error; @@ -1362,7 +1365,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) return error; - error = gfs2_qa_alloc(ndip); + error = gfs2_qa_get(ndip); if (error) return error; @@ -1562,6 +1565,7 @@ out_gunlock_r: if (gfs2_holder_initialized(&r_gh)) gfs2_glock_dq_uninit(&r_gh); out: + gfs2_qa_put(ndip); return error; } @@ -1873,10 +1877,9 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) ouid = nuid = NO_UID_QUOTA_CHANGE; if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid)) ogid = ngid = NO_GID_QUOTA_CHANGE; - - error = gfs2_qa_alloc(ip); + error = gfs2_qa_get(ip); if (error) - goto out; + return error; error = gfs2_rindex_update(sdp); if (error) @@ -1914,6 +1917,7 @@ out_end_trans: out_gunlock_q: gfs2_quota_unlock(ip); out: + gfs2_qa_put(ip); return error; } @@ -1935,21 +1939,21 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) struct gfs2_holder i_gh; int error; - error = gfs2_qa_alloc(ip); + error = gfs2_qa_get(ip); if (error) return error; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); if (error) - return error; + goto out; error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; + goto error; error = setattr_prepare(dentry, attr); if (error) - goto out; + goto error; if (attr->ia_valid & ATTR_SIZE) error = gfs2_setattr_size(inode, attr->ia_size); @@ -1961,10 +1965,12 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) error = posix_acl_chmod(inode, inode->i_mode); } -out: +error: if (!error) mark_inode_dirty(inode); gfs2_glock_dq_uninit(&i_gh); +out: + gfs2_qa_put(ip); return error; } diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index cbe45e8eb2e0..cc0c4b5800be 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -525,11 +525,11 @@ static void qdsb_put(struct gfs2_quota_data *qd) } /** - * gfs2_qa_alloc - make sure we have a quota allocations data structure, - * if necessary + * gfs2_qa_get - make sure we have a quota allocations data structure, + * if necessary * @ip: the inode for this reservation */ -int gfs2_qa_alloc(struct gfs2_inode *ip) +int gfs2_qa_get(struct gfs2_inode *ip) { int error = 0; struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); @@ -540,17 +540,21 @@ int gfs2_qa_alloc(struct gfs2_inode *ip) down_write(&ip->i_rw_mutex); if (ip->i_qadata == NULL) { ip->i_qadata = kmem_cache_zalloc(gfs2_qadata_cachep, GFP_NOFS); - if (!ip->i_qadata) + if (!ip->i_qadata) { error = -ENOMEM; + goto out; + } } + ip->i_qadata->qa_ref++; +out: up_write(&ip->i_rw_mutex); return error; } -void gfs2_qa_delete(struct gfs2_inode *ip, atomic_t *wcount) +void gfs2_qa_put(struct gfs2_inode *ip) { down_write(&ip->i_rw_mutex); - if (ip->i_qadata && ((wcount == NULL) || (atomic_read(wcount) <= 1))) { + if (ip->i_qadata && --ip->i_qadata->qa_ref == 0) { kmem_cache_free(gfs2_qadata_cachep, ip->i_qadata); ip->i_qadata = NULL; } @@ -566,27 +570,27 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; - if (ip->i_qadata == NULL) { - error = gfs2_qa_alloc(ip); - if (error) - return error; - } + error = gfs2_qa_get(ip); + if (error) + return error; qd = ip->i_qadata->qa_qd; if (gfs2_assert_warn(sdp, !ip->i_qadata->qa_qd_num) || - gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) - return -EIO; + gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) { + error = -EIO; + goto out; + } error = qdsb_get(sdp, make_kqid_uid(ip->i_inode.i_uid), qd); if (error) - goto out; + goto out_unhold; ip->i_qadata->qa_qd_num++; qd++; error = qdsb_get(sdp, make_kqid_gid(ip->i_inode.i_gid), qd); if (error) - goto out; + goto out_unhold; ip->i_qadata->qa_qd_num++; qd++; @@ -594,7 +598,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) !uid_eq(uid, ip->i_inode.i_uid)) { error = qdsb_get(sdp, make_kqid_uid(uid), qd); if (error) - goto out; + goto out_unhold; ip->i_qadata->qa_qd_num++; qd++; } @@ -603,14 +607,15 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) !gid_eq(gid, ip->i_inode.i_gid)) { error = qdsb_get(sdp, make_kqid_gid(gid), qd); if (error) - goto out; + goto out_unhold; ip->i_qadata->qa_qd_num++; qd++; } -out: +out_unhold: if (error) gfs2_quota_unhold(ip); +out: return error; } @@ -621,6 +626,7 @@ void gfs2_quota_unhold(struct gfs2_inode *ip) if (ip->i_qadata == NULL) return; + gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)); for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { @@ -628,6 +634,7 @@ void gfs2_quota_unhold(struct gfs2_inode *ip) ip->i_qadata->qa_qd[x] = NULL; } ip->i_qadata->qa_qd_num = 0; + gfs2_qa_put(ip); } static int sort_qd(const void *a, const void *b) @@ -876,7 +883,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) unsigned int nalloc = 0, blocks; int error; - error = gfs2_qa_alloc(ip); + error = gfs2_qa_get(ip); if (error) return error; @@ -884,8 +891,10 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) &data_blocks, &ind_blocks); ghs = kmalloc_array(num_qd, sizeof(struct gfs2_holder), GFP_NOFS); - if (!ghs) - return -ENOMEM; + if (!ghs) { + error = -ENOMEM; + goto out; + } sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); inode_lock(&ip->i_inode); @@ -893,12 +902,12 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &ghs[qx]); if (error) - goto out; + goto out_dq; } error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); if (error) - goto out; + goto out_dq; for (x = 0; x < num_qd; x++) { offset = qd2offset(qda[x]); @@ -950,13 +959,15 @@ out_ipres: gfs2_inplace_release(ip); out_alloc: gfs2_glock_dq_uninit(&i_gh); -out: +out_dq: while (qx--) gfs2_glock_dq_uninit(&ghs[qx]); inode_unlock(&ip->i_inode); kfree(ghs); gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_DO_SYNC); +out: + gfs2_qa_put(ip); return error; } @@ -1259,6 +1270,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, if (ip->i_diskflags & GFS2_DIF_SYSTEM) return; + BUG_ON(ip->i_qadata->qa_ref <= 0); for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { qd = ip->i_qadata->qa_qd[x]; @@ -1677,7 +1689,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, if (error) return error; - error = gfs2_qa_alloc(ip); + error = gfs2_qa_get(ip); if (error) goto out_put; @@ -1746,6 +1758,7 @@ out_i: out_q: gfs2_glock_dq_uninit(&q_gh); out_unlockput: + gfs2_qa_put(ip); inode_unlock(&ip->i_inode); out_put: qd_put(qd); diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 765627d9a91e..7f9ca8ef40fc 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -15,8 +15,8 @@ struct gfs2_sbd; #define NO_UID_QUOTA_CHANGE INVALID_UID #define NO_GID_QUOTA_CHANGE INVALID_GID -extern int gfs2_qa_alloc(struct gfs2_inode *ip); -extern void gfs2_qa_delete(struct gfs2_inode *ip, atomic_t *wcount); +extern int gfs2_qa_get(struct gfs2_inode *ip); +extern void gfs2_qa_put(struct gfs2_inode *ip); extern int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); extern void gfs2_quota_unhold(struct gfs2_inode *ip); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 3e3696da5bcb..04e3e13a230c 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -673,7 +673,7 @@ void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount) if ((wcount == NULL) || (atomic_read(wcount) <= 1)) gfs2_rs_deltree(&ip->i_res); up_write(&ip->i_rw_mutex); - gfs2_qa_delete(ip, wcount); + gfs2_qa_put(ip); } /** diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index a6bf8f1e083e..68d934fa0f9f 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1401,6 +1401,8 @@ out_unlock: fs_warn(sdp, "gfs2_evict_inode: %d\n", error); out: truncate_inode_pages_final(&inode->i_data); + if (ip->i_qadata) + gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); gfs2_rsqa_delete(ip, NULL); gfs2_ordered_del_inode(ip); clear_inode(inode); diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index c4fbb96e001f..9d7667bc4292 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -1222,7 +1222,7 @@ static int gfs2_xattr_set(const struct xattr_handler *handler, struct gfs2_holder gh; int ret; - ret = gfs2_qa_alloc(ip); + ret = gfs2_qa_get(ip); if (ret) return ret; @@ -1231,15 +1231,19 @@ static int gfs2_xattr_set(const struct xattr_handler *handler, if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); if (ret) - return ret; + goto out; } else { - if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE)) - return -EIO; + if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE)) { + ret = -EIO; + goto out; + } gfs2_holder_mark_uninitialized(&gh); } ret = __gfs2_xattr_set(inode, name, value, size, flags, handler->flags); if (gfs2_holder_initialized(&gh)) gfs2_glock_dq_uninit(&gh); +out: + gfs2_qa_put(ip); return ret; } -- cgit v1.2.3 From 1595548fe72ca834abe75fb3df47e300a087d563 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 6 Mar 2020 10:32:35 -0600 Subject: gfs2: Split gfs2_rsqa_delete into gfs2_rs_delete and gfs2_qa_put Keeping reservations and quotas separate helps reviewing the code. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/bmap.c | 3 ++- fs/gfs2/file.c | 6 ++++-- fs/gfs2/inode.c | 3 ++- fs/gfs2/rgrp.c | 5 ++--- fs/gfs2/rgrp.h | 2 +- fs/gfs2/super.c | 3 ++- 6 files changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index d510a453dfa8..936a8ec6b48e 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -2194,7 +2194,8 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) ret = do_shrink(inode, newsize); out: - gfs2_rsqa_delete(ip, NULL); + gfs2_rs_delete(ip, NULL); + gfs2_qa_put(ip); return ret; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index f18876cdfb0f..99a75e779ac0 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -701,8 +701,10 @@ static int gfs2_release(struct inode *inode, struct file *file) kfree(file->private_data); file->private_data = NULL; - if (file->f_mode & FMODE_WRITE) - gfs2_rsqa_delete(ip, &inode->i_writecount); + if (file->f_mode & FMODE_WRITE) { + gfs2_rs_delete(ip, &inode->i_writecount); + gfs2_qa_put(ip); + } return 0; } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index d1a24753c55f..980a6feb0e22 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -781,7 +781,8 @@ fail_free_inode: glock_clear_object(ip->i_gl, ip); gfs2_glock_put(ip->i_gl); } - gfs2_rsqa_delete(ip, NULL); + gfs2_rs_delete(ip, NULL); + gfs2_qa_put(ip); fail_free_acls: posix_acl_release(default_acl); posix_acl_release(acl); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 04e3e13a230c..692dc11d0f13 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -662,18 +662,17 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) } /** - * gfs2_rsqa_delete - delete a multi-block reservation and quota allocation + * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation * @wcount: The inode's write count, or NULL * */ -void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount) +void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) { down_write(&ip->i_rw_mutex); if ((wcount == NULL) || (atomic_read(wcount) <= 1)) gfs2_rs_deltree(&ip->i_res); up_write(&ip->i_rw_mutex); - gfs2_qa_put(ip); } /** diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 92cebb785996..a1d7e14fc55b 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -45,7 +45,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); -extern void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount); +extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount); extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 68d934fa0f9f..37fc41632aa2 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1403,7 +1403,8 @@ out: truncate_inode_pages_final(&inode->i_data); if (ip->i_qadata) gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); - gfs2_rsqa_delete(ip, NULL); + gfs2_rs_delete(ip, NULL); + gfs2_qa_put(ip); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); -- cgit v1.2.3 From 4bd684bc0143b46eea6dcf4dd3a73dfd6dc7247c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 6 Mar 2020 10:51:41 -0600 Subject: gfs2: Remove unnecessary gfs2_qa_{get,put} pairs We now get the quota data structure when opening a file writable and put it when closing that writable file descriptor, so there no longer is a need for gfs2_qa_{get,put} while we're holding a writable file descriptor. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/file.c | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 99a75e779ac0..fe305e4bfd37 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -458,10 +458,6 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) sb_start_pagefault(inode->i_sb); - ret = gfs2_qa_get(ip); - if (ret) - goto out; - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) @@ -553,13 +549,11 @@ out_quota_unlock: out_unlock: gfs2_glock_dq(&gh); out_uninit: - gfs2_qa_put(ip); gfs2_holder_uninit(&gh); if (ret == 0) { set_page_dirty(page); wait_for_stable_page(page); } -out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); } @@ -860,10 +854,6 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct gfs2_inode *ip = GFS2_I(inode); ssize_t ret; - ret = gfs2_qa_get(ip); - if (ret) - return ret; - gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from)); if (iocb->ki_flags & IOCB_APPEND) { @@ -871,7 +861,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) - goto out; + return ret; gfs2_glock_dq_uninit(&gh); } @@ -929,8 +919,6 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) out_unlock: inode_unlock(inode); -out: - gfs2_qa_put(ip); return ret; } @@ -1162,18 +1150,11 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le if (mode & FALLOC_FL_PUNCH_HOLE) { ret = __gfs2_punch_hole(file, offset, len); } else { - ret = gfs2_qa_get(ip); - if (ret) - goto out_putw; - ret = __gfs2_fallocate(file, mode, offset, len); - if (ret) gfs2_rs_deltree(&ip->i_res); - gfs2_qa_put(ip); } -out_putw: put_write_access(inode); out_unlock: gfs2_glock_dq(&gh); @@ -1187,18 +1168,11 @@ static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { - int error; - struct gfs2_inode *ip = GFS2_I(out->f_mapping->host); ssize_t ret; - error = gfs2_qa_get(ip); - if (error) - return (ssize_t)error; - gfs2_size_hint(out, *ppos, len); ret = iter_file_splice_write(pipe, out, ppos, len, flags); - gfs2_qa_put(ip); return ret; } -- cgit v1.2.3 From e04d339bd8b1b6c92f3bce117d35e75c508424bf Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 26 Mar 2020 12:18:21 -0500 Subject: gfs2: don't lock sd_log_flush_lock in try_rgrp_unlink In function try_rgrp_unlink, we added a temporary lock of the sd_log_flush_lock while searching the bitmaps. This protected us from problems in which dinodes being freed were still in a state of flux because the rgrp was in an active transaction. It was a kludge. Now that we've straightened out the code for inode eviction, deletes, and all the recovery mess, we no longer need this kludge. This patch removes it, and should improve performance. Signed-off-by: Bob Peterson --- fs/gfs2/rgrp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 692dc11d0f13..a321c34e3d6e 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1806,10 +1806,8 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 }; while (1) { - down_write(&sdp->sd_log_flush_lock); error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL, true); - up_write(&sdp->sd_log_flush_lock); if (error == -ENOSPC) break; if (WARN_ON_ONCE(error)) -- cgit v1.2.3 From 9592ea80ad13fe06d7848028af6c917aa1cd0aaa Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 26 Mar 2020 12:19:54 -0500 Subject: gfs2: instrumentation wrt ail1 stuck Before this patch, if the ail1 flush got stuck for some reason, there were no clues as to why. This patch introduces a check for getting stuck for more than a minute, and if it happens, it dumps the items still remaining on the ail1 list. Signed-off-by: Bob Peterson --- fs/gfs2/log.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 8729f5f3a98c..82f356f57045 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -139,6 +139,40 @@ __acquires(&sdp->sd_ail_lock) return ret; } +static void dump_ail_list(struct gfs2_sbd *sdp) +{ + struct gfs2_trans *tr; + struct gfs2_bufdata *bd; + struct buffer_head *bh; + + fs_err(sdp, "Error: In gfs2_ail1_flush for ten minutes! t=%d\n", + current->journal_info ? 1 : 0); + + list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) { + list_for_each_entry_reverse(bd, &tr->tr_ail1_list, + bd_ail_st_list) { + bh = bd->bd_bh; + fs_err(sdp, "bd %p: blk:0x%llx bh=%p ", bd, + (unsigned long long)bd->bd_blkno, bh); + if (!bh) { + fs_err(sdp, "\n"); + continue; + } + fs_err(sdp, "0x%llx up2:%d dirt:%d lkd:%d req:%d " + "map:%d new:%d ar:%d aw:%d delay:%d " + "io err:%d unwritten:%d dfr:%d pin:%d esc:%d\n", + (unsigned long long)bh->b_blocknr, + buffer_uptodate(bh), buffer_dirty(bh), + buffer_locked(bh), buffer_req(bh), + buffer_mapped(bh), buffer_new(bh), + buffer_async_read(bh), buffer_async_write(bh), + buffer_delay(bh), buffer_write_io_error(bh), + buffer_unwritten(bh), + buffer_defer_completion(bh), + buffer_pinned(bh), buffer_escaped(bh)); + } + } +} /** * gfs2_ail1_flush - start writeback of some ail1 entries @@ -155,11 +189,16 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) struct gfs2_trans *tr; struct blk_plug plug; int ret = 0; + unsigned long flush_start = jiffies; trace_gfs2_ail_flush(sdp, wbc, 1); blk_start_plug(&plug); spin_lock(&sdp->sd_ail_lock); restart: + if (time_after(jiffies, flush_start + (HZ * 600))) { + dump_ail_list(sdp); + goto out; + } list_for_each_entry_reverse(tr, head, tr_list) { if (wbc->nr_to_write <= 0) break; @@ -170,6 +209,7 @@ restart: break; } } +out: spin_unlock(&sdp->sd_ail_lock); blk_finish_plug(&plug); if (ret) { -- cgit v1.2.3 From c953a735c7d4d0d1b092b5c594258a07a84149db Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 26 Mar 2020 12:22:05 -0500 Subject: gfs2: change from write to read lock for sd_log_flush_lock in journal replay Function gfs2_recover_func grabs the sd_log_flush_lock rw_semaphore in write mode. This is unnecessary because we only need to prevent log flush from using sd_log_bio bio while it does. Therefore, a read lock will be enough. This is a small step in cleaning up log flush. Signed-off-by: Bob Peterson --- fs/gfs2/recovery.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 34dfdb211439..96c345f49273 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -401,7 +401,7 @@ void gfs2_recover_func(struct work_struct *work) /* We take the sd_log_flush_lock here primarily to prevent log * flushes and simultaneous journal replays from stomping on * each other wrt sd_log_bio. */ - down_write(&sdp->sd_log_flush_lock); + down_read(&sdp->sd_log_flush_lock); for (pass = 0; pass < 2; pass++) { lops_before_scan(jd, &head, pass); error = foreach_descriptor(jd, head.lh_tail, @@ -412,7 +412,7 @@ void gfs2_recover_func(struct work_struct *work) } clean_journal(jd, &head); - up_write(&sdp->sd_log_flush_lock); + up_read(&sdp->sd_log_flush_lock); gfs2_glock_dq_uninit(&thaw_gh); t_rep = ktime_get(); -- cgit v1.2.3 From 75b46c437f6b0f8e37032a407c7373f85f5c26a8 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 27 Mar 2020 15:23:14 -0500 Subject: gfs2: Fix oversight in gfs2_ail1_flush Ordinarily, function gfs2_ail1_start_one issues a write request for one item on the ail1 list, then returns -EBUSY. This makes the caller, gfs2_ail1_flush, loop around and start another. However, it was not clearing the -EBUSY return code each time through the loop. So on rare occasions, like when the wbc runs out of nr_to_write, it remained set to -EBUSY, which triggered an error and withdraw. This patch sets the return code to 0 each time through the restart loop so this won't happen anymore. Signed-off-by: Bob Peterson --- fs/gfs2/log.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 82f356f57045..3a75843ae580 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -188,13 +188,14 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) struct list_head *head = &sdp->sd_ail1_list; struct gfs2_trans *tr; struct blk_plug plug; - int ret = 0; + int ret; unsigned long flush_start = jiffies; trace_gfs2_ail_flush(sdp, wbc, 1); blk_start_plug(&plug); spin_lock(&sdp->sd_ail_lock); restart: + ret = 0; if (time_after(jiffies, flush_start + (HZ * 600))) { dump_ail_list(sdp); goto out; -- cgit v1.2.3