diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/jbd2.h | 124 | ||||
-rw-r--r-- | include/trace/events/ext4.h | 228 |
2 files changed, 344 insertions, 8 deletions
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 08f904943ab2..fb3d71ad6eea 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -289,6 +289,7 @@ typedef struct journal_superblock_s #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 #define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 #define JBD2_FEATURE_INCOMPAT_CSUM_V3 0x00000010 +#define JBD2_FEATURE_INCOMPAT_FAST_COMMIT 0x00000020 /* See "journal feature predicate functions" below */ @@ -299,7 +300,8 @@ typedef struct journal_superblock_s JBD2_FEATURE_INCOMPAT_64BIT | \ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \ JBD2_FEATURE_INCOMPAT_CSUM_V2 | \ - JBD2_FEATURE_INCOMPAT_CSUM_V3) + JBD2_FEATURE_INCOMPAT_CSUM_V3 | \ + JBD2_FEATURE_INCOMPAT_FAST_COMMIT) #ifdef __KERNEL__ @@ -452,8 +454,8 @@ struct jbd2_inode { struct jbd2_revoke_table_s; /** - * struct handle_s - The handle_s type is the concrete type associated with - * handle_t. + * struct jbd2_journal_handle - The jbd2_journal_handle type is the concrete + * type associated with handle_t. * @h_transaction: Which compound transaction is this update a part of? * @h_journal: Which journal handle belongs to - used iff h_reserved set. * @h_rsv_handle: Handle reserved for finishing the logical operation. @@ -629,7 +631,9 @@ struct transaction_s struct journal_head *t_shadow_list; /* - * List of inodes whose data we've modified in data=ordered mode. + * List of inodes associated with the transaction; e.g., ext4 uses + * this to track inodes in data=ordered and data=journal mode that + * need special handling on transaction commit; also used by ocfs2. * [j_list_lock] */ struct list_head t_inode_list; @@ -747,6 +751,11 @@ jbd2_time_diff(unsigned long start, unsigned long end) #define JBD2_NR_BATCH 64 +enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; + +#define JBD2_FC_REPLAY_STOP 0 +#define JBD2_FC_REPLAY_CONTINUE 1 + /** * struct journal_s - The journal_s type is the concrete type associated with * journal_t. @@ -858,6 +867,13 @@ struct journal_s wait_queue_head_t j_wait_reserved; /** + * @j_fc_wait: + * + * Wait queue to wait for completion of async fast commits. + */ + wait_queue_head_t j_fc_wait; + + /** * @j_checkpoint_mutex: * * Semaphore for locking against concurrent checkpoints. @@ -915,6 +931,30 @@ struct journal_s unsigned long j_last; /** + * @j_fc_first: + * + * The block number of the first fast commit block in the journal + * [j_state_lock]. + */ + unsigned long j_fc_first; + + /** + * @j_fc_off: + * + * Number of fast commit blocks currently allocated. + * [j_state_lock]. + */ + unsigned long j_fc_off; + + /** + * @j_fc_last: + * + * The block number one beyond the last fast commit block in the journal + * [j_state_lock]. + */ + unsigned long j_fc_last; + + /** * @j_dev: Device where we store the journal. */ struct block_device *j_dev; @@ -1065,6 +1105,12 @@ struct journal_s struct buffer_head **j_wbuf; /** + * @j_fc_wbuf: Array of fast commit bhs for + * jbd2_journal_commit_transaction. + */ + struct buffer_head **j_fc_wbuf; + + /** * @j_wbufsize: * * Size of @j_wbuf array. @@ -1072,6 +1118,13 @@ struct journal_s int j_wbufsize; /** + * @j_fc_wbufsize: + * + * Size of @j_fc_wbuf array. + */ + int j_fc_wbufsize; + + /** * @j_last_sync_writer: * * The pid of the last person to run a synchronous operation @@ -1111,6 +1164,27 @@ struct journal_s void (*j_commit_callback)(journal_t *, transaction_t *); + /** + * @j_submit_inode_data_buffers: + * + * This function is called for all inodes associated with the + * committing transaction marked with JI_WRITE_DATA flag + * before we start to write out the transaction to the journal. + */ + int (*j_submit_inode_data_buffers) + (struct jbd2_inode *); + + /** + * @j_finish_inode_data_buffers: + * + * This function is called for all inodes associated with the + * committing transaction marked with JI_WAIT_DATA flag + * after we have written the transaction to the journal + * but before we write out the commit block. + */ + int (*j_finish_inode_data_buffers) + (struct jbd2_inode *); + /* * Journal statistics */ @@ -1170,6 +1244,30 @@ struct journal_s */ struct lockdep_map j_trans_commit_map; #endif + + /** + * @j_fc_cleanup_callback: + * + * Clean-up after fast commit or full commit. JBD2 calls this function + * after every commit operation. + */ + void (*j_fc_cleanup_callback)(struct journal_s *journal, int); + + /* + * @j_fc_replay_callback: + * + * File-system specific function that performs replay of a fast + * commit. JBD2 calls this function for each fast commit block found in + * the journal. This function should return JBD2_FC_REPLAY_CONTINUE + * to indicate that the block was processed correctly and more fast + * commit replay should continue. Return value of JBD2_FC_REPLAY_STOP + * indicates the end of replay (no more blocks remaining). A negative + * return value indicates error. + */ + int (*j_fc_replay_callback)(struct journal_s *journal, + struct buffer_head *bh, + enum passtype pass, int off, + tid_t expected_commit_id); }; #define jbd2_might_wait_for_commit(j) \ @@ -1240,6 +1338,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(64bit, 64BIT) JBD2_FEATURE_INCOMPAT_FUNCS(async_commit, ASYNC_COMMIT) JBD2_FEATURE_INCOMPAT_FUNCS(csum2, CSUM_V2) JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) +JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) /* * Journal flag definitions @@ -1253,6 +1352,8 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file * data write error in ordered * mode */ +#define JBD2_FAST_COMMIT_ONGOING 0x100 /* Fast commit is ongoing */ +#define JBD2_FULL_COMMIT_ONGOING 0x200 /* Full commit is ongoing */ /* * Function declarations for the journaling transaction and buffer @@ -1421,6 +1522,10 @@ extern int jbd2_journal_inode_ranged_write(handle_t *handle, extern int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *inode, loff_t start_byte, loff_t length); +extern int jbd2_journal_submit_inode_data_buffers( + struct jbd2_inode *jinode); +extern int jbd2_journal_finish_inode_data_buffers( + struct jbd2_inode *jinode); extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, struct jbd2_inode *inode, loff_t new_size); extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); @@ -1505,6 +1610,17 @@ void __jbd2_log_wait_for_space(journal_t *journal); extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); extern int jbd2_cleanup_journal_tail(journal_t *); +/* Fast commit related APIs */ +int jbd2_fc_init(journal_t *journal, int num_fc_blks); +int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); +int jbd2_fc_end_commit(journal_t *journal); +int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid); +int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out); +int jbd2_submit_inode_data(struct jbd2_inode *jinode); +int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); +int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); +int jbd2_fc_release_bufs(journal_t *journal); + /* * is_journal_abort * diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 4c8b99ec8606..b14314fcf732 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -95,6 +95,16 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B); { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) +#define show_fc_reason(reason) \ + __print_symbolic(reason, \ + { EXT4_FC_REASON_XATTR, "XATTR"}, \ + { EXT4_FC_REASON_CROSS_RENAME, "CROSS_RENAME"}, \ + { EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, "JOURNAL_FLAG_CHANGE"}, \ + { EXT4_FC_REASON_MEM, "NO_MEM"}, \ + { EXT4_FC_REASON_SWAP_BOOT, "SWAP_BOOT"}, \ + { EXT4_FC_REASON_RESIZE, "RESIZE"}, \ + { EXT4_FC_REASON_RENAME_DIR, "RENAME_DIR"}, \ + { EXT4_FC_REASON_FALLOC_RANGE, "FALLOC_RANGE"}) TRACE_EVENT(ext4_other_inode_update_time, TP_PROTO(struct inode *inode, ino_t orig_ino), @@ -1766,9 +1776,9 @@ TRACE_EVENT(ext4_ext_load_extent, ); TRACE_EVENT(ext4_load_inode, - TP_PROTO(struct inode *inode), + TP_PROTO(struct super_block *sb, unsigned long ino), - TP_ARGS(inode), + TP_ARGS(sb, ino), TP_STRUCT__entry( __field( dev_t, dev ) @@ -1776,8 +1786,8 @@ TRACE_EVENT(ext4_load_inode, ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; + __entry->dev = sb->s_dev; + __entry->ino = ino; ), TP_printk("dev %d,%d ino %ld", @@ -2791,6 +2801,216 @@ TRACE_EVENT(ext4_lazy_itable_init, MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group) ); +TRACE_EVENT(ext4_fc_replay_scan, + TP_PROTO(struct super_block *sb, int error, int off), + + TP_ARGS(sb, error, off), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, error) + __field(int, off) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->error = error; + __entry->off = off; + ), + + TP_printk("FC scan pass on dev %d,%d: error %d, off %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->error, __entry->off) +); + +TRACE_EVENT(ext4_fc_replay, + TP_PROTO(struct super_block *sb, int tag, int ino, int priv1, int priv2), + + TP_ARGS(sb, tag, ino, priv1, priv2), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, tag) + __field(int, ino) + __field(int, priv1) + __field(int, priv2) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->tag = tag; + __entry->ino = ino; + __entry->priv1 = priv1; + __entry->priv2 = priv2; + ), + + TP_printk("FC Replay %d,%d: tag %d, ino %d, data1 %d, data2 %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->tag, __entry->ino, __entry->priv1, __entry->priv2) +); + +TRACE_EVENT(ext4_fc_commit_start, + TP_PROTO(struct super_block *sb), + + TP_ARGS(sb), + + TP_STRUCT__entry( + __field(dev_t, dev) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + ), + + TP_printk("fast_commit started on dev %d,%d", + MAJOR(__entry->dev), MINOR(__entry->dev)) +); + +TRACE_EVENT(ext4_fc_commit_stop, + TP_PROTO(struct super_block *sb, int nblks, int reason), + + TP_ARGS(sb, nblks, reason), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, nblks) + __field(int, reason) + __field(int, num_fc) + __field(int, num_fc_ineligible) + __field(int, nblks_agg) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->nblks = nblks; + __entry->reason = reason; + __entry->num_fc = EXT4_SB(sb)->s_fc_stats.fc_num_commits; + __entry->num_fc_ineligible = + EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits; + __entry->nblks_agg = EXT4_SB(sb)->s_fc_stats.fc_numblks; + ), + + TP_printk("fc on [%d,%d] nblks %d, reason %d, fc = %d, ineligible = %d, agg_nblks %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->nblks, __entry->reason, __entry->num_fc, + __entry->num_fc_ineligible, __entry->nblks_agg) +); + +#define FC_REASON_NAME_STAT(reason) \ + show_fc_reason(reason), \ + __entry->sbi->s_fc_stats.fc_ineligible_reason_count[reason] + +TRACE_EVENT(ext4_fc_stats, + TP_PROTO(struct super_block *sb), + + TP_ARGS(sb), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(struct ext4_sb_info *, sbi) + __field(int, count) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->sbi = EXT4_SB(sb); + ), + + TP_printk("dev %d:%d fc ineligible reasons:\n" + "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s,%d; " + "num_commits:%ld, ineligible: %ld, numblks: %ld", + MAJOR(__entry->dev), MINOR(__entry->dev), + FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), + FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), + FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), + FC_REASON_NAME_STAT(EXT4_FC_REASON_MEM), + FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), + FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), + FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), + FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), + __entry->sbi->s_fc_stats.fc_num_commits, + __entry->sbi->s_fc_stats.fc_ineligible_commits, + __entry->sbi->s_fc_stats.fc_numblks) + +); + +#define DEFINE_TRACE_DENTRY_EVENT(__type) \ + TRACE_EVENT(ext4_fc_track_##__type, \ + TP_PROTO(struct inode *inode, struct dentry *dentry, int ret), \ + \ + TP_ARGS(inode, dentry, ret), \ + \ + TP_STRUCT__entry( \ + __field(dev_t, dev) \ + __field(int, ino) \ + __field(int, error) \ + ), \ + \ + TP_fast_assign( \ + __entry->dev = inode->i_sb->s_dev; \ + __entry->ino = inode->i_ino; \ + __entry->error = ret; \ + ), \ + \ + TP_printk("dev %d:%d, inode %d, error %d, fc_%s", \ + MAJOR(__entry->dev), MINOR(__entry->dev), \ + __entry->ino, __entry->error, \ + #__type) \ + ) + +DEFINE_TRACE_DENTRY_EVENT(create); +DEFINE_TRACE_DENTRY_EVENT(link); +DEFINE_TRACE_DENTRY_EVENT(unlink); + +TRACE_EVENT(ext4_fc_track_inode, + TP_PROTO(struct inode *inode, int ret), + + TP_ARGS(inode, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, ino) + __field(int, error) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->error = ret; + ), + + TP_printk("dev %d:%d, inode %d, error %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, __entry->error) + ); + +TRACE_EVENT(ext4_fc_track_range, + TP_PROTO(struct inode *inode, long start, long end, int ret), + + TP_ARGS(inode, start, end, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, ino) + __field(long, start) + __field(long, end) + __field(int, error) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->start = start; + __entry->end = end; + __entry->error = ret; + ), + + TP_printk("dev %d:%d, inode %d, error %d, start %ld, end %ld", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, __entry->error, __entry->start, + __entry->end) + ); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ |