From c52c47e4b4fbe4284602fc2ccbfc4a4d8dc05b49 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Sat, 29 Apr 2017 20:12:16 -0400
Subject: jbd2: Fix lockdep splat with generic/270 test

I've hit a lockdep splat with generic/270 test complaining that:

3216.fsstress.b/3533 is trying to acquire lock:
 (jbd2_handle){++++..}, at: [<ffffffff813152e0>] jbd2_log_wait_commit+0x0/0x150

but task is already holding lock:
 (jbd2_handle){++++..}, at: [<ffffffff8130bd3b>] start_this_handle+0x35b/0x850

The underlying problem is that jbd2_journal_force_commit_nested()
(called from ext4_should_retry_alloc()) may get called while a
transaction handle is started. In such case it takes care to not wait
for commit of the running transaction (which would deadlock) but only
for a commit of a transaction that is already committing (which is safe
as that doesn't wait for any filesystem locks).

In fact there are also other callers of jbd2_log_wait_commit() that take
care to pass tid of a transaction that is already committing and for
those cases, the lockdep instrumentation is too restrictive and leading
to false positive reports. Fix the problem by calling
jbd2_might_wait_for_commit() from jbd2_log_wait_commit() only if the
transaction isn't already committing.

Fixes: 1eaa566d368b214d99cbb973647c1b0b8102a9ae
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/journal.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'fs/jbd2')
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 5adc2fb62b0f..9410ec462ba6 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -691,8 +691,21 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
 {
 	int err = 0;
 
-	jbd2_might_wait_for_commit(journal);
 	read_lock(&journal->j_state_lock);
+#ifdef CONFIG_PROVE_LOCKING
+	/*
+	 * Some callers make sure transaction is already committing and in that
+	 * case we cannot block on open handles anymore. So don't warn in that
+	 * case.
+	 */
+	if (tid_gt(tid, journal->j_commit_sequence) &&
+	    (!journal->j_committing_transaction ||
+	     journal->j_committing_transaction->t_tid != tid)) {
+		read_unlock(&journal->j_state_lock);
+		jbd2_might_wait_for_commit(journal);
+		read_lock(&journal->j_state_lock);
+	}
+#endif
 #ifdef CONFIG_JBD2_DEBUG
 	if (!tid_geq(journal->j_commit_request, tid)) {
 		printk(KERN_ERR
-- 
cgit v1.2.3


From 5052b069acf73866d00077d8bc49983c3ee903e5 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Sat, 29 Apr 2017 21:07:30 -0400
Subject: jbd2: fix dbench4 performance regression for 'nobarrier' mounts

Commit b685d3d65ac7 "block: treat REQ_FUA and REQ_PREFLUSH as
synchronous" removed REQ_SYNC flag from WRITE_FUA implementation. Since
JBD2 strips REQ_FUA and REQ_FLUSH flags from submitted IO when the
filesystem is mounted with nobarrier mount option, journal superblock
writes ended up being async writes after this patch and that caused
heavy performance regression for dbench4 benchmark with high number of
processes. In my test setup with HP RAID array with non-volatile write
cache and 32 GB ram, dbench4 runs with 8 processes regressed by ~25%.

Fix the problem by making sure journal superblock writes are always
treated as synchronous since they generally block progress of the
journalling machinery and thus the whole filesystem.

Fixes: b685d3d65ac791406e0dfd8779cc9b3707fea5a3
CC: stable@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/journal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/jbd2')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 9410ec462ba6..f1906fa54321 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1361,7 +1361,7 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
 	jbd2_superblock_csum_set(journal, sb);
 	get_bh(bh);
 	bh->b_end_io = end_buffer_write_sync;
-	ret = submit_bh(REQ_OP_WRITE, write_flags, bh);
+	ret = submit_bh(REQ_OP_WRITE, write_flags | REQ_SYNC, bh);
 	wait_on_buffer(bh);
 	if (buffer_write_io_error(bh)) {
 		clear_buffer_write_io_error(bh);
-- 
cgit v1.2.3


From 17f423b5160767a8ec43b0602767e5f4d3ecd083 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 4 May 2017 11:01:31 -0400
Subject: jbd2: cleanup write flags handling from jbd2_write_superblock()

Currently jbd2_write_superblock() silently adds REQ_SYNC to flags with
which journal superblock is written. Make this explicit by making flags
passed down to jbd2_write_superblock() contain REQ_SYNC.

CC: linux-ext4@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/journal.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'fs/jbd2')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f1906fa54321..1ce13479b10d 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -926,7 +926,8 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
 	 * space and if we lose sb update during power failure we'd replay
 	 * old transaction with possibly newly overwritten data.
 	 */
-	ret = jbd2_journal_update_sb_log_tail(journal, tid, block, REQ_FUA);
+	ret = jbd2_journal_update_sb_log_tail(journal, tid, block,
+					      REQ_SYNC | REQ_FUA);
 	if (ret)
 		goto out;
 
@@ -1327,7 +1328,7 @@ static int journal_reset(journal_t *journal)
 		jbd2_journal_update_sb_log_tail(journal,
 						journal->j_tail_sequence,
 						journal->j_tail,
-						REQ_FUA);
+						REQ_SYNC | REQ_FUA);
 		mutex_unlock(&journal->j_checkpoint_mutex);
 	}
 	return jbd2_journal_start_thread(journal);
@@ -1361,7 +1362,7 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
 	jbd2_superblock_csum_set(journal, sb);
 	get_bh(bh);
 	bh->b_end_io = end_buffer_write_sync;
-	ret = submit_bh(REQ_OP_WRITE, write_flags | REQ_SYNC, bh);
+	ret = submit_bh(REQ_OP_WRITE, write_flags, bh);
 	wait_on_buffer(bh);
 	if (buffer_write_io_error(bh)) {
 		clear_buffer_write_io_error(bh);
@@ -1467,7 +1468,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
 	sb->s_errno    = cpu_to_be32(journal->j_errno);
 	read_unlock(&journal->j_state_lock);
 
-	jbd2_write_superblock(journal, REQ_FUA);
+	jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA);
 }
 EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
 
@@ -1734,7 +1735,7 @@ int jbd2_journal_destroy(journal_t *journal)
 			write_unlock(&journal->j_state_lock);
 
 			jbd2_mark_journal_empty(journal,
-					REQ_PREFLUSH | REQ_FUA);
+					REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
 			mutex_unlock(&journal->j_checkpoint_mutex);
 		} else
 			err = -EIO;
@@ -1993,7 +1994,7 @@ int jbd2_journal_flush(journal_t *journal)
 	 * the magic code for a fully-recovered superblock.  Any future
 	 * commits of data to the journal will restore the current
 	 * s_start value. */
-	jbd2_mark_journal_empty(journal, REQ_FUA);
+	jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
 	mutex_unlock(&journal->j_checkpoint_mutex);
 	write_lock(&journal->j_state_lock);
 	J_ASSERT(!journal->j_running_transaction);
@@ -2039,7 +2040,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
 	if (write) {
 		/* Lock to make assertions happy... */
 		mutex_lock(&journal->j_checkpoint_mutex);
-		jbd2_mark_journal_empty(journal, REQ_FUA);
+		jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
 		mutex_unlock(&journal->j_checkpoint_mutex);
 	}
 
-- 
cgit v1.2.3