From d0f17d3883f1e3f085d38572c2ea8edbd5150172 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 27 Oct 2020 10:10:01 -0500 Subject: gfs2: Free rd_bits later in gfs2_clear_rgrpd to fix use-after-free Function gfs2_clear_rgrpd calls kfree(rgd->rd_bits) before calling return_all_reservations, but return_all_reservations still dereferences rgd->rd_bits in __rs_deltree. Fix that by moving the call to kfree below the call to return_all_reservations. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index ee491bb9c1cc..eb1b29734b7f 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -719,9 +719,9 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) } gfs2_free_clones(rgd); + return_all_reservations(rgd); kfree(rgd->rd_bits); rgd->rd_bits = NULL; - return_all_reservations(rgd); kmem_cache_free(gfs2_rgrpd_cachep, rgd); } } -- cgit v1.2.3 From a9dd945ccef07a904e412f208f8de708a3d7159e Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 27 Oct 2020 10:10:02 -0500 Subject: gfs2: Add missing truncate_inode_pages_final for sd_aspace Gfs2 creates an address space for its rgrps called sd_aspace, but it never called truncate_inode_pages_final on it. This confused vfs greatly which tried to reference the address space after gfs2 had freed the superblock that contained it. This patch adds a call to truncate_inode_pages_final for sd_aspace, thus avoiding the use-after-free. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b285192bd6b3..b3d951ab8068 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -738,6 +738,7 @@ restart: gfs2_jindex_free(sdp); /* Take apart glock structures and buffer lists */ gfs2_gl_hash_clear(sdp); + truncate_inode_pages_final(&sdp->sd_aspace); gfs2_delete_debugfs_file(sdp); /* Unmount the locking protocol */ gfs2_lm_unmount(sdp); -- cgit v1.2.3 From c4af59bd441f90e185a652cce1aaf38dea293bf2 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 27 Oct 2020 16:08:07 -0400 Subject: gfs2: init_journal's undo directive should also undo the statfs inodes Hi, Before this patch, function init_journal's "undo" directive jumped to label fail_jinode_gh. But now that it does statfs initialization, it needs to jump to fail_statfs instead. Failure to do so means that mount failures after init_journal is successful will neglect to let go of the proper statfs information, stranding the statfs_changeX inodes. This makes it impossible to free its glocks, and results in: gfs2: fsid=sda.s: G: s:EX n:2/805f f:Dqob t:EX d:UN/603701000 a:0 v:0 r:4 m:200 p:1 gfs2: fsid=sda.s: H: s:EX f:H e:0 p:1397947 [(ended)] init_journal+0x548/0x890 [gfs2] gfs2: fsid=sda.s: I: n:6/32863 t:8 f:0x00 d:0x00000201 s:24 p:0 gfs2: fsid=sda.s: G: s:SH n:5/805f f:Dqob t:SH d:UN/603712000 a:0 v:0 r:3 m:200 p:0 gfs2: fsid=sda.s: H: s:SH f:EH e:0 p:1397947 [(ended)] gfs2_inode_lookup+0x1fb/0x410 [gfs2] VFS: Busy inodes after unmount of sda. Self-destruct in 5 seconds. Have a nice day... The next time the file system is mounted, it then reuses the same glocks, which ends in a kernel NULL pointer dereference when trying to dump the reused glock. This patch makes the "undo" function of init_journal jump to fail_statfs so the statfs files are properly deconstructed upon failure. Fixes: 97fd734ba17e ("gfs2: lookup local statfs inodes prior to journal recovery") Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/ops_fstype.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 7a7e3c10a9a9..1ed4b61e3298 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -704,7 +704,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) if (undo) { jindex = 0; - goto fail_jinode_gh; + goto fail_statfs; } sdp->sd_jindex = gfs2_lookup_simple(master, "jindex"); -- cgit v1.2.3 From 4a55752ae288caaef8df4c5f4e07393c127bb9f0 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 27 Oct 2020 12:29:37 -0500 Subject: gfs2: Split up gfs2_meta_sync into inode and rgrp versions Before this patch, function gfs2_meta_sync called filemap_fdatawrite to write the address space for the metadata being synced. That's great for inodes, but resource groups all point to the same superblock-address space, sdp->sd_aspace. Each rgrp has its own range of blocks on which it should operate. That meant every time an rgrp's metadata was synced, it would write all of them instead of just the range. This patch eliminates function gfs2_meta_sync and tailors specific metasync functions for inodes and rgrps. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 56 +++++++++++++++++++++++++++++++++++++++++++----------- fs/gfs2/glops.h | 1 + fs/gfs2/lops.c | 31 +++++------------------------- fs/gfs2/lops.h | 2 -- fs/gfs2/recovery.c | 2 +- 5 files changed, 52 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index aa3f5236befb..6c1432d78dce 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -164,6 +164,31 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) GFS2_LFC_AIL_FLUSH); } +/** + * gfs2_rgrp_metasync - sync out the metadata of a resource group + * @gl: the glock protecting the resource group + * + */ + +static int gfs2_rgrp_metasync(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + struct address_space *metamapping = &sdp->sd_aspace; + struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl); + const unsigned bsize = sdp->sd_sb.sb_bsize; + loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK; + loff_t end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1; + int error; + + filemap_fdatawrite_range(metamapping, start, end); + error = filemap_fdatawait_range(metamapping, start, end); + WARN_ON_ONCE(error && !gfs2_withdrawn(sdp)); + mapping_set_error(metamapping, error); + if (error) + gfs2_io_error(sdp); + return error; +} + /** * rgrp_go_sync - sync out the metadata for this glock * @gl: the glock @@ -176,11 +201,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) static int rgrp_go_sync(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct address_space *mapping = &sdp->sd_aspace; struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl); - const unsigned bsize = sdp->sd_sb.sb_bsize; - loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK; - loff_t end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1; int error; if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) @@ -189,10 +210,7 @@ static int rgrp_go_sync(struct gfs2_glock *gl) gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_RGRP_GO_SYNC); - filemap_fdatawrite_range(mapping, start, end); - error = filemap_fdatawait_range(mapping, start, end); - WARN_ON_ONCE(error && !gfs2_withdrawn(sdp)); - mapping_set_error(mapping, error); + error = gfs2_rgrp_metasync(gl); if (!error) error = gfs2_ail_empty_gl(gl); gfs2_free_clones(rgd); @@ -266,7 +284,24 @@ static void gfs2_clear_glop_pending(struct gfs2_inode *ip) } /** - * inode_go_sync - Sync the dirty data and/or metadata for an inode glock + * gfs2_inode_metasync - sync out the metadata of an inode + * @gl: the glock protecting the inode + * + */ +int gfs2_inode_metasync(struct gfs2_glock *gl) +{ + struct address_space *metamapping = gfs2_glock2aspace(gl); + int error; + + filemap_fdatawrite(metamapping); + error = filemap_fdatawait(metamapping); + if (error) + gfs2_io_error(gl->gl_name.ln_sbd); + return error; +} + +/** + * inode_go_sync - Sync the dirty metadata of an inode * @gl: the glock protecting the inode * */ @@ -297,8 +332,7 @@ static int inode_go_sync(struct gfs2_glock *gl) error = filemap_fdatawait(mapping); mapping_set_error(mapping, error); } - ret = filemap_fdatawait(metamapping); - mapping_set_error(metamapping, ret); + ret = gfs2_inode_metasync(gl); if (!error) error = ret; gfs2_ail_empty_gl(gl); diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h index 2dd192e85618..695898afcaf1 100644 --- a/fs/gfs2/glops.h +++ b/fs/gfs2/glops.h @@ -22,6 +22,7 @@ extern const struct gfs2_glock_operations gfs2_quota_glops; extern const struct gfs2_glock_operations gfs2_journal_glops; extern const struct gfs2_glock_operations *gfs2_glops_list[]; +extern int gfs2_inode_metasync(struct gfs2_glock *gl); extern void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync); #endif /* __GLOPS_DOT_H__ */ diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index ed69298dd824..3922b26264f5 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -22,6 +22,7 @@ #include "incore.h" #include "inode.h" #include "glock.h" +#include "glops.h" #include "log.h" #include "lops.h" #include "meta_io.h" @@ -817,41 +818,19 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start, return error; } -/** - * gfs2_meta_sync - Sync all buffers associated with a glock - * @gl: The glock - * - */ - -void gfs2_meta_sync(struct gfs2_glock *gl) -{ - struct address_space *mapping = gfs2_glock2aspace(gl); - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - int error; - - if (mapping == NULL) - mapping = &sdp->sd_aspace; - - filemap_fdatawrite(mapping); - error = filemap_fdatawait(mapping); - - if (error) - gfs2_io_error(gl->gl_name.ln_sbd); -} - static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); if (error) { - gfs2_meta_sync(ip->i_gl); + gfs2_inode_metasync(ip->i_gl); return; } if (pass != 1) return; - gfs2_meta_sync(ip->i_gl); + gfs2_inode_metasync(ip->i_gl); fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n", jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks); @@ -1060,14 +1039,14 @@ static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); if (error) { - gfs2_meta_sync(ip->i_gl); + gfs2_inode_metasync(ip->i_gl); return; } if (pass != 1) return; /* data sync? */ - gfs2_meta_sync(ip->i_gl); + gfs2_inode_metasync(ip->i_gl); fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n", jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks); diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 4a3d8aecdf82..fbdbb08dcec6 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -27,8 +27,6 @@ extern void gfs2_log_submit_bio(struct bio **biop, int opf); extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); extern int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, bool keep_cache); -extern void gfs2_meta_sync(struct gfs2_glock *gl); - static inline unsigned int buf_limit(struct gfs2_sbd *sdp) { unsigned int limit; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index b5cbe21efdfb..c26c68ebd29d 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -349,7 +349,7 @@ static int update_statfs_inode(struct gfs2_jdesc *jd, mark_buffer_dirty(bh); brelse(bh); - gfs2_meta_sync(ip->i_gl); + gfs2_inode_metasync(ip->i_gl); out: return error; -- cgit v1.2.3 From 7e5b92669904c1de13070ab6d8b788eab9d0cf1f Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 28 Oct 2020 12:03:23 -0500 Subject: gfs2: don't initialize statfs_change inodes in spectator mode Before commit 97fd734ba17e, the local statfs_changeX inode was never initialized for spectator mounts. However, it still checks for spectator mounts when unmounting everything. There's no good reason to lookup the statfs_changeX files because spectators cannot perform recovery. It still, however, needs the master statfs file for statfs calls. This patch adds the check for spectator mounts to init_statfs. Fixes: 97fd734ba17e ("gfs2: lookup local statfs inodes prior to journal recovery") Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/ops_fstype.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1ed4b61e3298..61fce59cb4d3 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -633,8 +633,10 @@ static int init_statfs(struct gfs2_sbd *sdp) if (IS_ERR(sdp->sd_statfs_inode)) { error = PTR_ERR(sdp->sd_statfs_inode); fs_err(sdp, "can't read in statfs inode: %d\n", error); - goto fail; + goto out; } + if (sdp->sd_args.ar_spectator) + goto out; pn = gfs2_lookup_simple(master, "per_node"); if (IS_ERR(pn)) { @@ -682,15 +684,17 @@ free_local: iput(pn); put_statfs: iput(sdp->sd_statfs_inode); -fail: +out: return error; } /* Uninitialize and free up memory used by the list of statfs inodes */ static void uninit_statfs(struct gfs2_sbd *sdp) { - gfs2_glock_dq_uninit(&sdp->sd_sc_gh); - free_local_statfs_inodes(sdp); + if (!sdp->sd_args.ar_spectator) { + gfs2_glock_dq_uninit(&sdp->sd_sc_gh); + free_local_statfs_inodes(sdp); + } iput(sdp->sd_statfs_inode); } -- cgit v1.2.3 From c5c68724696e7d2f8db58a5fce3673208d35c485 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 28 Oct 2020 13:42:18 -0500 Subject: gfs2: check for live vs. read-only file system in gfs2_fitrim Before this patch, gfs2_fitrim was not properly checking for a "live" file system. If the file system had something to trim and the file system was read-only (or spectator) it would start the trim, but when it starts the transaction, gfs2_trans_begin returns -EROFS (read-only file system) and it errors out. However, if the file system was already trimmed so there's no work to do, it never called gfs2_trans_begin. That code is bypassed so it never returns the error. Instead, it returns a good return code with 0 work. All this makes for inconsistent behavior: The same fstrim command can return -EROFS in one case and 0 in another. This tripped up xfstests generic/537 which reports the error as: +fstrim with unrecovered metadata just ate your filesystem This patch adds a check for a "live" (iow, active journal, iow, RW) file system, and if not, returns the error properly. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index eb1b29734b7f..92d799a193b8 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1370,6 +1370,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp) if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + return -EROFS; + if (!blk_queue_discard(q)) return -EOPNOTSUPP; -- cgit v1.2.3 From 6bd1c7bd4ee7b17980cdc347522dcb76feac9b98 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 2 Nov 2020 21:11:30 +0100 Subject: gfs2: Don't call cancel_delayed_work_sync from within delete work function Right now, we can end up calling cancel_delayed_work_sync from within delete_work_func via gfs2_lookup_by_inum -> gfs2_inode_lookup -> gfs2_cancel_delete_work. When that happens, it will result in a deadlock. Instead, gfs2_inode_lookup should skip the call to gfs2_cancel_delete_work when called from delete_work_func (blktype == GFS2_BLKST_UNLINKED). Reported-by: Alexander Ahring Oder Aring Fixes: a0e3cc65fa29 ("gfs2: Turn gl_delete into a delayed work") Cc: stable@vger.kernel.org # v5.8+ Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 6774865f5b5b..077ccb1b3ccc 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -180,7 +180,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (unlikely(error)) goto fail; - gfs2_cancel_delete_work(ip->i_iopen_gh.gh_gl); + if (blktype != GFS2_BLKST_UNLINKED) + gfs2_cancel_delete_work(ip->i_iopen_gh.gh_gl); glock_set_object(ip->i_iopen_gh.gh_gl, ip); gfs2_glock_put(io_gl); io_gl = NULL; -- cgit v1.2.3 From da7d554f7c62d0c17c1ac3cc2586473c2d99f0bd Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 26 Oct 2020 10:52:29 -0400 Subject: gfs2: Wake up when sd_glock_disposal becomes zero Commit fc0e38dae645 ("GFS2: Fix glock deallocation race") fixed a sd_glock_disposal accounting bug by adding a missing atomic_dec statement, but it failed to wake up sd_glock_wait when that decrement causes sd_glock_disposal to reach zero. As a consequence, gfs2_gl_hash_clear can now run into a 10-minute timeout instead of being woken up. Add the missing wakeup. Fixes: fc0e38dae645 ("GFS2: Fix glock deallocation race") Cc: stable@vger.kernel.org # v2.6.39+ Signed-off-by: Alexander Aring Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 5441c17562c5..d98a2e5dab9f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1078,7 +1078,8 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, out_free: kfree(gl->gl_lksb.sb_lvbptr); kmem_cache_free(cachep, gl); - atomic_dec(&sdp->sd_glock_disposal); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); out: return ret; -- cgit v1.2.3