From c52c47e4b4fbe4284602fc2ccbfc4a4d8dc05b49 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 29 Apr 2017 20:12:16 -0400 Subject: jbd2: Fix lockdep splat with generic/270 test I've hit a lockdep splat with generic/270 test complaining that: 3216.fsstress.b/3533 is trying to acquire lock: (jbd2_handle){++++..}, at: [] jbd2_log_wait_commit+0x0/0x150 but task is already holding lock: (jbd2_handle){++++..}, at: [] start_this_handle+0x35b/0x850 The underlying problem is that jbd2_journal_force_commit_nested() (called from ext4_should_retry_alloc()) may get called while a transaction handle is started. In such case it takes care to not wait for commit of the running transaction (which would deadlock) but only for a commit of a transaction that is already committing (which is safe as that doesn't wait for any filesystem locks). In fact there are also other callers of jbd2_log_wait_commit() that take care to pass tid of a transaction that is already committing and for those cases, the lockdep instrumentation is too restrictive and leading to false positive reports. Fix the problem by calling jbd2_might_wait_for_commit() from jbd2_log_wait_commit() only if the transaction isn't already committing. Fixes: 1eaa566d368b214d99cbb973647c1b0b8102a9ae Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'fs/jbd2') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 5adc2fb62b0f..9410ec462ba6 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -691,8 +691,21 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) { int err = 0; - jbd2_might_wait_for_commit(journal); read_lock(&journal->j_state_lock); +#ifdef CONFIG_PROVE_LOCKING + /* + * Some callers make sure transaction is already committing and in that + * case we cannot block on open handles anymore. So don't warn in that + * case. + */ + if (tid_gt(tid, journal->j_commit_sequence) && + (!journal->j_committing_transaction || + journal->j_committing_transaction->t_tid != tid)) { + read_unlock(&journal->j_state_lock); + jbd2_might_wait_for_commit(journal); + read_lock(&journal->j_state_lock); + } +#endif #ifdef CONFIG_JBD2_DEBUG if (!tid_geq(journal->j_commit_request, tid)) { printk(KERN_ERR -- cgit v1.2.3 From 5052b069acf73866d00077d8bc49983c3ee903e5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 29 Apr 2017 21:07:30 -0400 Subject: jbd2: fix dbench4 performance regression for 'nobarrier' mounts Commit b685d3d65ac7 "block: treat REQ_FUA and REQ_PREFLUSH as synchronous" removed REQ_SYNC flag from WRITE_FUA implementation. Since JBD2 strips REQ_FUA and REQ_FLUSH flags from submitted IO when the filesystem is mounted with nobarrier mount option, journal superblock writes ended up being async writes after this patch and that caused heavy performance regression for dbench4 benchmark with high number of processes. In my test setup with HP RAID array with non-volatile write cache and 32 GB ram, dbench4 runs with 8 processes regressed by ~25%. Fix the problem by making sure journal superblock writes are always treated as synchronous since they generally block progress of the journalling machinery and thus the whole filesystem. Fixes: b685d3d65ac791406e0dfd8779cc9b3707fea5a3 CC: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/jbd2') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 9410ec462ba6..f1906fa54321 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1361,7 +1361,7 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags) jbd2_superblock_csum_set(journal, sb); get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(REQ_OP_WRITE, write_flags, bh); + ret = submit_bh(REQ_OP_WRITE, write_flags | REQ_SYNC, bh); wait_on_buffer(bh); if (buffer_write_io_error(bh)) { clear_buffer_write_io_error(bh); -- cgit v1.2.3 From 17f423b5160767a8ec43b0602767e5f4d3ecd083 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 4 May 2017 11:01:31 -0400 Subject: jbd2: cleanup write flags handling from jbd2_write_superblock() Currently jbd2_write_superblock() silently adds REQ_SYNC to flags with which journal superblock is written. Make this explicit by making flags passed down to jbd2_write_superblock() contain REQ_SYNC. CC: linux-ext4@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/jbd2/journal.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs/jbd2') diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index f1906fa54321..1ce13479b10d 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -926,7 +926,8 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) * space and if we lose sb update during power failure we'd replay * old transaction with possibly newly overwritten data. */ - ret = jbd2_journal_update_sb_log_tail(journal, tid, block, REQ_FUA); + ret = jbd2_journal_update_sb_log_tail(journal, tid, block, + REQ_SYNC | REQ_FUA); if (ret) goto out; @@ -1327,7 +1328,7 @@ static int journal_reset(journal_t *journal) jbd2_journal_update_sb_log_tail(journal, journal->j_tail_sequence, journal->j_tail, - REQ_FUA); + REQ_SYNC | REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } return jbd2_journal_start_thread(journal); @@ -1361,7 +1362,7 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags) jbd2_superblock_csum_set(journal, sb); get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(REQ_OP_WRITE, write_flags | REQ_SYNC, bh); + ret = submit_bh(REQ_OP_WRITE, write_flags, bh); wait_on_buffer(bh); if (buffer_write_io_error(bh)) { clear_buffer_write_io_error(bh); @@ -1467,7 +1468,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal) sb->s_errno = cpu_to_be32(journal->j_errno); read_unlock(&journal->j_state_lock); - jbd2_write_superblock(journal, REQ_FUA); + jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA); } EXPORT_SYMBOL(jbd2_journal_update_sb_errno); @@ -1734,7 +1735,7 @@ int jbd2_journal_destroy(journal_t *journal) write_unlock(&journal->j_state_lock); jbd2_mark_journal_empty(journal, - REQ_PREFLUSH | REQ_FUA); + REQ_SYNC | REQ_PREFLUSH | REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } else err = -EIO; @@ -1993,7 +1994,7 @@ int jbd2_journal_flush(journal_t *journal) * the magic code for a fully-recovered superblock. Any future * commits of data to the journal will restore the current * s_start value. */ - jbd2_mark_journal_empty(journal, REQ_FUA); + jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); write_lock(&journal->j_state_lock); J_ASSERT(!journal->j_running_transaction); @@ -2039,7 +2040,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) if (write) { /* Lock to make assertions happy... */ mutex_lock(&journal->j_checkpoint_mutex); - jbd2_mark_journal_empty(journal, REQ_FUA); + jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } -- cgit v1.2.3