From aa3c0c61f62d682259e3e66cdc01846290f9cd6c Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Mon, 5 Oct 2020 21:48:38 -0300 Subject: jbd2: introduce/export functions jbd2_journal_submit|finish_inode_data_buffers() Export functions that implement the current behavior done for an inode in journal_submit|finish_inode_data_buffers(). No functional change. Signed-off-by: Mauricio Faria de Oliveira Suggested-by: Jan Kara Reviewed-by: Jan Kara Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201006004841.600488-2-mfo@canonical.com Signed-off-by: Theodore Ts'o --- fs/jbd2/commit.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) (limited to 'fs/jbd2/commit.c') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6d2da8ad0e6f..f79b86b4241f 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -187,19 +187,17 @@ static int journal_wait_on_commit_record(journal_t *journal, * use writepages() because with delayed allocation we may be doing * block allocation in writepages(). */ -static int journal_submit_inode_data_buffers(struct address_space *mapping, - loff_t dirty_start, loff_t dirty_end) +int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) { - int ret; + struct address_space *mapping = jinode->i_vfs_inode->i_mapping; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = mapping->nrpages * 2, - .range_start = dirty_start, - .range_end = dirty_end, + .range_start = jinode->i_dirty_start, + .range_end = jinode->i_dirty_end, }; - ret = generic_writepages(mapping, &wbc); - return ret; + return generic_writepages(mapping, &wbc); } /* @@ -215,16 +213,11 @@ static int journal_submit_data_buffers(journal_t *journal, { struct jbd2_inode *jinode; int err, ret = 0; - struct address_space *mapping; spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { - loff_t dirty_start = jinode->i_dirty_start; - loff_t dirty_end = jinode->i_dirty_end; - if (!(jinode->i_flags & JI_WRITE_DATA)) continue; - mapping = jinode->i_vfs_inode->i_mapping; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); /* @@ -234,8 +227,7 @@ static int journal_submit_data_buffers(journal_t *journal, * only allocated blocks here. */ trace_jbd2_submit_inode_data(jinode->i_vfs_inode); - err = journal_submit_inode_data_buffers(mapping, dirty_start, - dirty_end); + err = jbd2_journal_submit_inode_data_buffers(jinode); if (!ret) ret = err; spin_lock(&journal->j_list_lock); @@ -248,6 +240,15 @@ static int journal_submit_data_buffers(journal_t *journal, return ret; } +int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode) +{ + struct address_space *mapping = jinode->i_vfs_inode->i_mapping; + + return filemap_fdatawait_range_keep_errors(mapping, + jinode->i_dirty_start, + jinode->i_dirty_end); +} + /* * Wait for data submitted for writeout, refile inodes to proper * transaction if needed. @@ -262,16 +263,11 @@ static int journal_finish_inode_data_buffers(journal_t *journal, /* For locking, see the comment in journal_submit_data_buffers() */ spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { - loff_t dirty_start = jinode->i_dirty_start; - loff_t dirty_end = jinode->i_dirty_end; - if (!(jinode->i_flags & JI_WAIT_DATA)) continue; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); - err = filemap_fdatawait_range_keep_errors( - jinode->i_vfs_inode->i_mapping, dirty_start, - dirty_end); + err = jbd2_journal_finish_inode_data_buffers(jinode); if (!ret) ret = err; spin_lock(&journal->j_list_lock); -- cgit v1.2.3 From 342af94ec6c02aa478fe2adcd41b950e154b03ba Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Mon, 5 Oct 2020 21:48:39 -0300 Subject: jbd2, ext4, ocfs2: introduce/use journal callbacks j_submit|finish_inode_data_buffers() Introduce journal callbacks to allow different behaviors for an inode in journal_submit|finish_inode_data_buffers(). The existing users of the current behavior (ext4, ocfs2) are adapted to use the previously exported functions that implement the current behavior. Users are callers of jbd2_journal_inode_ranged_write|wait(), which adds the inode to the transaction's inode list with the JI_WRITE|WAIT_DATA flags. Only ext4 and ocfs2 in-tree. Both CONFIG_EXT4_FS and CONFIG_OCSFS2_FS select CONFIG_JBD2, which builds fs/jbd2/commit.c and journal.c that define and export the functions, so we can call directly in ext4/ocfs2. Signed-off-by: Mauricio Faria de Oliveira Suggested-by: Jan Kara Reviewed-by: Jan Kara Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20201006004841.600488-3-mfo@canonical.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 4 ++++ fs/jbd2/commit.c | 30 ++++++++++++++++++------------ fs/ocfs2/journal.c | 4 ++++ include/linux/jbd2.h | 25 ++++++++++++++++++++++++- 4 files changed, 50 insertions(+), 13 deletions(-) (limited to 'fs/jbd2/commit.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0ee673e8e261..a3e57f554f1b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4752,6 +4752,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; + sbi->s_journal->j_submit_inode_data_buffers = + jbd2_journal_submit_inode_data_buffers; + sbi->s_journal->j_finish_inode_data_buffers = + jbd2_journal_finish_inode_data_buffers; no_journal: if (!test_opt(sb, NO_MBCACHE)) { diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f79b86b4241f..6252b4c50666 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -197,6 +197,12 @@ int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) .range_end = jinode->i_dirty_end, }; + /* + * submit the inode data buffers. We use writepage + * instead of writepages. Because writepages can do + * block allocation with delalloc. We need to write + * only allocated blocks here. + */ return generic_writepages(mapping, &wbc); } @@ -220,16 +226,13 @@ static int journal_submit_data_buffers(journal_t *journal, continue; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); - /* - * submit the inode data buffers. We use writepage - * instead of writepages. Because writepages can do - * block allocation with delalloc. We need to write - * only allocated blocks here. - */ + /* submit the inode data buffers. */ trace_jbd2_submit_inode_data(jinode->i_vfs_inode); - err = jbd2_journal_submit_inode_data_buffers(jinode); - if (!ret) - ret = err; + if (journal->j_submit_inode_data_buffers) { + err = journal->j_submit_inode_data_buffers(jinode); + if (!ret) + ret = err; + } spin_lock(&journal->j_list_lock); J_ASSERT(jinode->i_transaction == commit_transaction); jinode->i_flags &= ~JI_COMMIT_RUNNING; @@ -267,9 +270,12 @@ static int journal_finish_inode_data_buffers(journal_t *journal, continue; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); - err = jbd2_journal_finish_inode_data_buffers(jinode); - if (!ret) - ret = err; + /* wait for the inode data buffers writeout. */ + if (journal->j_finish_inode_data_buffers) { + err = journal->j_finish_inode_data_buffers(jinode); + if (!ret) + ret = err; + } spin_lock(&journal->j_list_lock); jinode->i_flags &= ~JI_COMMIT_RUNNING; smp_mb(); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index b425f0b01dce..b9a9d69dde7e 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -883,6 +883,10 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) OCFS2_JOURNAL_DIRTY_FL); journal->j_journal = j_journal; + journal->j_journal->j_submit_inode_data_buffers = + jbd2_journal_submit_inode_data_buffers; + journal->j_journal->j_finish_inode_data_buffers = + jbd2_journal_finish_inode_data_buffers; journal->j_inode = inode; journal->j_bh = bh; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 8b7b06066bc2..04afa6dcd60d 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -629,7 +629,9 @@ struct transaction_s struct journal_head *t_shadow_list; /* - * List of inodes whose data we've modified in data=ordered mode. + * List of inodes associated with the transaction; e.g., ext4 uses + * this to track inodes in data=ordered and data=journal mode that + * need special handling on transaction commit; also used by ocfs2. * [j_list_lock] */ struct list_head t_inode_list; @@ -1111,6 +1113,27 @@ struct journal_s void (*j_commit_callback)(journal_t *, transaction_t *); + /** + * @j_submit_inode_data_buffers: + * + * This function is called for all inodes associated with the + * committing transaction marked with JI_WRITE_DATA flag + * before we start to write out the transaction to the journal. + */ + int (*j_submit_inode_data_buffers) + (struct jbd2_inode *); + + /** + * @j_finish_inode_data_buffers: + * + * This function is called for all inodes associated with the + * committing transaction marked with JI_WAIT_DATA flag + * after we have written the transaction to the journal + * but before we write out the commit block. + */ + int (*j_finish_inode_data_buffers) + (struct jbd2_inode *); + /* * Journal statistics */ -- cgit v1.2.3 From ff780b91efe901b8eecd8114785abae5341820ad Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 15 Oct 2020 13:37:56 -0700 Subject: jbd2: add fast commit machinery This functions adds necessary APIs needed in JBD2 layer for fast commits. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201015203802.3597742-5-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 8 +++ fs/jbd2/commit.c | 44 ++++++++++++ fs/jbd2/journal.c | 190 +++++++++++++++++++++++++++++++++++++++++++++++++- include/linux/jbd2.h | 27 +++++++ 4 files changed, 268 insertions(+), 1 deletion(-) (limited to 'fs/jbd2/commit.c') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 0dad8bdb1253..f2d11b4c6b62 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -8,11 +8,19 @@ * Ext4 fast commits routines. */ #include "ext4_jbd2.h" +/* + * Fast commit cleanup routine. This is called after every fast commit and + * full commit. full is true if we are called after a full commit. + */ +static void ext4_fc_cleanup(journal_t *journal, int full) +{ +} void ext4_fc_init(struct super_block *sb, journal_t *journal) { if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) return; + journal->j_fc_cleanup_callback = ext4_fc_cleanup; if (jbd2_fc_init(journal, EXT4_NUM_FC_BLKS)) { pr_warn("Error while enabling fast commits, turning off."); ext4_clear_feature_fast_commit(sb); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6252b4c50666..fa688e163a80 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -206,6 +206,30 @@ int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) return generic_writepages(mapping, &wbc); } +/* Send all the data buffers related to an inode */ +int jbd2_submit_inode_data(struct jbd2_inode *jinode) +{ + + if (!jinode || !(jinode->i_flags & JI_WRITE_DATA)) + return 0; + + trace_jbd2_submit_inode_data(jinode->i_vfs_inode); + return jbd2_journal_submit_inode_data_buffers(jinode); + +} +EXPORT_SYMBOL(jbd2_submit_inode_data); + +int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode) +{ + if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) || + !jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping) + return 0; + return filemap_fdatawait_range_keep_errors( + jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start, + jinode->i_dirty_end); +} +EXPORT_SYMBOL(jbd2_wait_inode_data); + /* * Submit all the data buffers of inode associated with the transaction to * disk. @@ -415,6 +439,20 @@ void jbd2_journal_commit_transaction(journal_t *journal) J_ASSERT(journal->j_running_transaction != NULL); J_ASSERT(journal->j_committing_transaction == NULL); + write_lock(&journal->j_state_lock); + journal->j_flags |= JBD2_FULL_COMMIT_ONGOING; + while (journal->j_flags & JBD2_FAST_COMMIT_ONGOING) { + DEFINE_WAIT(wait); + + prepare_to_wait(&journal->j_fc_wait, &wait, + TASK_UNINTERRUPTIBLE); + write_unlock(&journal->j_state_lock); + schedule(); + write_lock(&journal->j_state_lock); + finish_wait(&journal->j_fc_wait, &wait); + } + write_unlock(&journal->j_state_lock); + commit_transaction = journal->j_running_transaction; trace_jbd2_start_commit(journal, commit_transaction); @@ -422,6 +460,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) commit_transaction->t_tid); write_lock(&journal->j_state_lock); + journal->j_fc_off = 0; J_ASSERT(commit_transaction->t_state == T_RUNNING); commit_transaction->t_state = T_LOCKED; @@ -1121,12 +1160,16 @@ restart_loop: if (journal->j_commit_callback) journal->j_commit_callback(journal, commit_transaction); + if (journal->j_fc_cleanup_callback) + journal->j_fc_cleanup_callback(journal, 1); trace_jbd2_end_commit(journal, commit_transaction); jbd_debug(1, "JBD2: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); write_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_FULL_COMMIT_ONGOING; + journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING; spin_lock(&journal->j_list_lock); commit_transaction->t_state = T_FINISHED; /* Check if the transaction can be dropped now that we are finished */ @@ -1138,6 +1181,7 @@ restart_loop: spin_unlock(&journal->j_list_lock); write_unlock(&journal->j_state_lock); wake_up(&journal->j_wait_done_commit); + wake_up(&journal->j_fc_wait); /* * Calculate overall stats diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 4497bfbac527..0c7c42bd530f 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -159,7 +159,9 @@ static void commit_timeout(struct timer_list *t) * * 1) COMMIT: Every so often we need to commit the current state of the * filesystem to disk. The journal thread is responsible for writing - * all of the metadata buffers to disk. + * all of the metadata buffers to disk. If a fast commit is ongoing + * journal thread waits until it's done and then continues from + * there on. * * 2) CHECKPOINT: We cannot reuse a used section of the log file until all * of the data in that part of the log has been rewritten elsewhere on @@ -716,6 +718,75 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) return err; } +/* + * Start a fast commit. If there's an ongoing fast or full commit wait for + * it to complete. Returns 0 if a new fast commit was started. Returns -EALREADY + * if a fast commit is not needed, either because there's an already a commit + * going on or this tid has already been committed. Returns -EINVAL if no jbd2 + * commit has yet been performed. + */ +int jbd2_fc_begin_commit(journal_t *journal, tid_t tid) +{ + /* + * Fast commits only allowed if at least one full commit has + * been processed. + */ + if (!journal->j_stats.ts_tid) + return -EINVAL; + + if (tid <= journal->j_commit_sequence) + return -EALREADY; + + write_lock(&journal->j_state_lock); + if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING || + (journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) { + DEFINE_WAIT(wait); + + prepare_to_wait(&journal->j_fc_wait, &wait, + TASK_UNINTERRUPTIBLE); + write_unlock(&journal->j_state_lock); + schedule(); + finish_wait(&journal->j_fc_wait, &wait); + return -EALREADY; + } + journal->j_flags |= JBD2_FAST_COMMIT_ONGOING; + write_unlock(&journal->j_state_lock); + + return 0; +} +EXPORT_SYMBOL(jbd2_fc_begin_commit); + +/* + * Stop a fast commit. If fallback is set, this function starts commit of + * TID tid before any other fast commit can start. + */ +static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback) +{ + if (journal->j_fc_cleanup_callback) + journal->j_fc_cleanup_callback(journal, 0); + write_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING; + if (fallback) + journal->j_flags |= JBD2_FULL_COMMIT_ONGOING; + write_unlock(&journal->j_state_lock); + wake_up(&journal->j_fc_wait); + if (fallback) + return jbd2_complete_transaction(journal, tid); + return 0; +} + +int jbd2_fc_end_commit(journal_t *journal) +{ + return __jbd2_fc_end_commit(journal, 0, 0); +} +EXPORT_SYMBOL(jbd2_fc_end_commit); + +int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid) +{ + return __jbd2_fc_end_commit(journal, tid, 1); +} +EXPORT_SYMBOL(jbd2_fc_end_commit_fallback); + /* Return 1 when transaction with given tid has already committed. */ int jbd2_transaction_committed(journal_t *journal, tid_t tid) { @@ -784,6 +855,110 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp) return jbd2_journal_bmap(journal, blocknr, retp); } +/* Map one fast commit buffer for use by the file system */ +int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out) +{ + unsigned long long pblock; + unsigned long blocknr; + int ret = 0; + struct buffer_head *bh; + int fc_off; + + *bh_out = NULL; + write_lock(&journal->j_state_lock); + + if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) { + fc_off = journal->j_fc_off; + blocknr = journal->j_fc_first + fc_off; + journal->j_fc_off++; + } else { + ret = -EINVAL; + } + write_unlock(&journal->j_state_lock); + + if (ret) + return ret; + + ret = jbd2_journal_bmap(journal, blocknr, &pblock); + if (ret) + return ret; + + bh = __getblk(journal->j_dev, pblock, journal->j_blocksize); + if (!bh) + return -ENOMEM; + + lock_buffer(bh); + + clear_buffer_uptodate(bh); + set_buffer_dirty(bh); + unlock_buffer(bh); + journal->j_fc_wbuf[fc_off] = bh; + + *bh_out = bh; + + return 0; +} +EXPORT_SYMBOL(jbd2_fc_get_buf); + +/* + * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf + * for completion. + */ +int jbd2_fc_wait_bufs(journal_t *journal, int num_blks) +{ + struct buffer_head *bh; + int i, j_fc_off; + + read_lock(&journal->j_state_lock); + j_fc_off = journal->j_fc_off; + read_unlock(&journal->j_state_lock); + + /* + * Wait in reverse order to minimize chances of us being woken up before + * all IOs have completed + */ + for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) { + bh = journal->j_fc_wbuf[i]; + wait_on_buffer(bh); + put_bh(bh); + journal->j_fc_wbuf[i] = NULL; + if (unlikely(!buffer_uptodate(bh))) + return -EIO; + } + + return 0; +} +EXPORT_SYMBOL(jbd2_fc_wait_bufs); + +/* + * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf + * for completion. + */ +int jbd2_fc_release_bufs(journal_t *journal) +{ + struct buffer_head *bh; + int i, j_fc_off; + + read_lock(&journal->j_state_lock); + j_fc_off = journal->j_fc_off; + read_unlock(&journal->j_state_lock); + + /* + * Wait in reverse order to minimize chances of us being woken up before + * all IOs have completed + */ + for (i = j_fc_off - 1; i >= 0; i--) { + bh = journal->j_fc_wbuf[i]; + if (!bh) + break; + put_bh(bh); + journal->j_fc_wbuf[i] = NULL; + } + + return 0; +} +EXPORT_SYMBOL(jbd2_fc_release_bufs); + /* * Conversion of logical to physical block numbers for the journal * @@ -1142,6 +1317,7 @@ static journal_t *journal_init_common(struct block_device *bdev, init_waitqueue_head(&journal->j_wait_commit); init_waitqueue_head(&journal->j_wait_updates); init_waitqueue_head(&journal->j_wait_reserved); + init_waitqueue_head(&journal->j_fc_wait); mutex_init(&journal->j_abort_mutex); mutex_init(&journal->j_barrier); mutex_init(&journal->j_checkpoint_mutex); @@ -1495,6 +1671,7 @@ out: static void jbd2_mark_journal_empty(journal_t *journal, int write_op) { journal_superblock_t *sb = journal->j_superblock; + bool had_fast_commit = false; BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); lock_buffer(journal->j_sb_buffer); @@ -1508,9 +1685,20 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op) sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); sb->s_start = cpu_to_be32(0); + if (jbd2_has_feature_fast_commit(journal)) { + /* + * When journal is clean, no need to commit fast commit flag and + * make file system incompatible with older kernels. + */ + jbd2_clear_feature_fast_commit(journal); + had_fast_commit = true; + } jbd2_write_superblock(journal, write_op); + if (had_fast_commit) + jbd2_set_feature_fast_commit(journal); + /* Log is no longer empty */ write_lock(&journal->j_state_lock); journal->j_flags |= JBD2_FLUSHED; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 008629b4d615..a009d9b9c620 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -861,6 +861,13 @@ struct journal_s */ wait_queue_head_t j_wait_reserved; + /** + * @j_fc_wait: + * + * Wait queue to wait for completion of async fast commits. + */ + wait_queue_head_t j_fc_wait; + /** * @j_checkpoint_mutex: * @@ -1232,6 +1239,15 @@ struct journal_s */ struct lockdep_map j_trans_commit_map; #endif + + /** + * @j_fc_cleanup_callback: + * + * Clean-up after fast commit or full commit. JBD2 calls this function + * after every commit operation. + */ + void (*j_fc_cleanup_callback)(struct journal_s *journal, int); + }; #define jbd2_might_wait_for_commit(j) \ @@ -1316,6 +1332,8 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file * data write error in ordered * mode */ +#define JBD2_FAST_COMMIT_ONGOING 0x100 /* Fast commit is ongoing */ +#define JBD2_FULL_COMMIT_ONGOING 0x200 /* Full commit is ongoing */ /* * Function declarations for the journaling transaction and buffer @@ -1574,6 +1592,15 @@ extern int jbd2_cleanup_journal_tail(journal_t *); /* Fast commit related APIs */ int jbd2_fc_init(journal_t *journal, int num_fc_blks); +int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); +int jbd2_fc_end_commit(journal_t *journal); +int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid); +int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out); +int jbd2_submit_inode_data(struct jbd2_inode *jinode); +int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); +int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); +int jbd2_fc_release_bufs(journal_t *journal); + /* * is_journal_abort * -- cgit v1.2.3 From ede7dc7fa0af619afc08995776eadb9ff3b0a711 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:54 -0800 Subject: jbd2: rename j_maxlen to j_total_len and add jbd2_journal_max_txn_bufs The on-disk superblock field sb->s_maxlen represents the total size of the journal including the fast commit area and is no more the max number of blocks available for a transaction. The maximum number of blocks available to a transaction is reduced by the number of fast commit blocks. So, this patch renames j_maxlen to j_total_len to better represent its intent. Also, it adds a function to calculate max number of bufs available for a transaction. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-6-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/fsmap.c | 2 +- fs/ext4/super.c | 2 +- fs/jbd2/commit.c | 2 +- fs/jbd2/journal.c | 12 ++++++------ fs/jbd2/recovery.c | 6 +++--- fs/ocfs2/journal.c | 2 +- include/linux/jbd2.h | 9 +++++++-- 7 files changed, 20 insertions(+), 15 deletions(-) (limited to 'fs/jbd2/commit.c') diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index b232c2767534..4c2a9fe30067 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -280,7 +280,7 @@ static int ext4_getfsmap_logdev(struct super_block *sb, struct ext4_fsmap *keys, /* Fabricate an rmap entry for the external log device. */ irec.fmr_physical = journal->j_blk_offset; - irec.fmr_length = journal->j_maxlen; + irec.fmr_length = journal->j_total_len; irec.fmr_owner = EXT4_FMR_OWN_LOG; irec.fmr_flags = 0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 738a6dd4957d..8a6dd433bb70 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3976,7 +3976,7 @@ int ext4_calculate_overhead(struct super_block *sb) * loaded or not */ if (sbi->s_journal && !sbi->s_journal_bdev) - overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); + overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len); else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) { /* j_inum for internal journal is non-zero */ j_inode = ext4_get_journal_inode(sb, j_inum); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index fa688e163a80..ec516490cb35 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -801,7 +801,7 @@ start_journal_io: if (first_block < journal->j_tail) freed += journal->j_last - journal->j_first; /* Update tail only if we free significant amount of space */ - if (freed < journal->j_maxlen / 4) + if (freed < jbd2_journal_get_max_txn_bufs(journal)) update_tail = 0; } J_ASSERT(commit_transaction->t_state == T_COMMIT); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0c7c42bd530f..c3c768248527 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1348,7 +1348,7 @@ static journal_t *journal_init_common(struct block_device *bdev, journal->j_dev = bdev; journal->j_fs_dev = fs_dev; journal->j_blk_offset = start; - journal->j_maxlen = len; + journal->j_total_len = len; /* We need enough buffers to write out full descriptor block. */ n = journal->j_blocksize / jbd2_min_tag_size(); journal->j_wbufsize = n; @@ -1531,7 +1531,7 @@ static int journal_reset(journal_t *journal) journal->j_commit_sequence = journal->j_transaction_sequence - 1; journal->j_commit_request = journal->j_commit_sequence; - journal->j_max_transaction_buffers = journal->j_maxlen / 4; + journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal); /* * As a special case, if the on-disk copy is already marked as needing @@ -1792,15 +1792,15 @@ static int journal_get_superblock(journal_t *journal) goto out; } - if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) - journal->j_maxlen = be32_to_cpu(sb->s_maxlen); - else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { + if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len) + journal->j_total_len = be32_to_cpu(sb->s_maxlen); + else if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) { printk(KERN_WARNING "JBD2: journal file too short\n"); goto out; } if (be32_to_cpu(sb->s_first) == 0 || - be32_to_cpu(sb->s_first) >= journal->j_maxlen) { + be32_to_cpu(sb->s_first) >= journal->j_total_len) { printk(KERN_WARNING "JBD2: Invalid start block of journal: %u\n", be32_to_cpu(sb->s_first)); diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index eb2606133cd8..dc0694fcfcd1 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -74,8 +74,8 @@ static int do_readahead(journal_t *journal, unsigned int start) /* Do up to 128K of readahead */ max = start + (128 * 1024 / journal->j_blocksize); - if (max > journal->j_maxlen) - max = journal->j_maxlen; + if (max > journal->j_total_len) + max = journal->j_total_len; /* Do the readahead itself. We'll submit MAXBUF buffer_heads at * a time to the block device IO layer. */ @@ -134,7 +134,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal, *bhp = NULL; - if (offset >= journal->j_maxlen) { + if (offset >= journal->j_total_len) { printk(KERN_ERR "JBD2: corrupted journal superblock\n"); return -EFSCORRUPTED; } diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index b9a9d69dde7e..db52e843002a 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -877,7 +877,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) goto done; } - trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen); + trace_ocfs2_journal_init_maxlen(j_journal->j_total_len); *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & OCFS2_JOURNAL_DIRTY_FL); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1d5566af48ac..e0b6b53eae64 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -988,9 +988,9 @@ struct journal_s struct block_device *j_fs_dev; /** - * @j_maxlen: Total maximum capacity of the journal region on disk. + * @j_total_len: Total maximum capacity of the journal region on disk. */ - unsigned int j_maxlen; + unsigned int j_total_len; /** * @j_reserved_credits: @@ -1624,6 +1624,11 @@ int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); int jbd2_fc_release_bufs(journal_t *journal); +static inline int jbd2_journal_get_max_txn_bufs(journal_t *journal) +{ + return (journal->j_total_len - journal->j_fc_wbufsize) / 4; +} + /* * is_journal_abort * -- cgit v1.2.3 From cc80586a57f704f806b9a1b99a21cd07e37dbedc Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:59 -0800 Subject: jbd2: add todo for a fast commit performance optimization Fast commit performance can be optimized if commit thread doesn't wait for ongoing fast commits to complete until the transaction enters T_FLUSH state. Document this optimization. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-11-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- fs/jbd2/commit.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/jbd2/commit.c') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index ec516490cb35..b121d7d434c6 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -450,6 +450,15 @@ void jbd2_journal_commit_transaction(journal_t *journal) schedule(); write_lock(&journal->j_state_lock); finish_wait(&journal->j_fc_wait, &wait); + /* + * TODO: by blocking fast commits here, we are increasing + * fsync() latency slightly. Strictly speaking, we don't need + * to block fast commits until the transaction enters T_FLUSH + * state. So an optimization is possible where we block new fast + * commits here and wait for existing ones to complete + * just before we enter T_FLUSH. That way, the existing fast + * commits and this full commit can proceed parallely. + */ } write_unlock(&journal->j_state_lock); -- cgit v1.2.3