From c6153ed23ed6a2bb8a5891382c06134674610f86 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Thu, 16 Apr 2026 19:01:18 +0100 Subject: btrfs: add ioctl GET_CSUMS to read raw checksums from file range Add a new unprivileged BTRFS_IOC_GET_CSUMS ioctl, which can be used to query the on-disk csums for a file range. The ioctl is deliberately per-file rather than exposing raw csum tree lookups, to avoid leaking information to users about files they may not have access to. This is done by userspace passing a struct btrfs_ioctl_get_csums_args to the kernel, which details the offset and length we're interested in, and a buffer for the kernel to write its results into. The kernel writes a struct btrfs_ioctl_get_csums_entry into the buffer, followed by the csums if available. The maximum size of the user buffer is capped to 16MiB. If the extent is an uncompressed, non-NODATASUM extent, the kernel sets the entry type to BTRFS_GET_CSUMS_HAS_CSUMS and follows it with the csums. If it is sparse, preallocated, or beyond the EOF, it sets the type to BTRFS_GET_CSUMS_ZEROED - this is so userspace knows it can use the precomputed hash of the zero sector. Otherwise, it sets the type to BTRFS_GET_CSUMS_NODATASUM, BTRFS_GET_CSUMS_COMPRESSED, BTRFS_GET_CSUM_ENCRYPTED, or BTRFS_GET_CSUM_INLINE. For example, a file with a [0, 4K) hole and [4K, 12K) data extent would produce the following output buffer: | [0, 4K) ZEROED | [4K, 12K) HAS_CSUMS | csum data | We do store the csums of compressed extents, but we deliberately don't return them here: they're calculated over the compressed data, not the uncompressed data that's returned to userspace. Similarly for encrypted data, once encryption is supported, in which the csums will be on the ciphertext. The main use case for this is for speeding up mkfs.btrfs --rootdir. For the case when the source FS is btrfs and using the same csum algorithm, we can avoid having to recalculate the csums - in my synthetic benchmarks (16GB file on a spinning-rust drive), this resulted in a ~11% speed-up (218s to 196s). When using the --reflink option added in btrfs-progs v6.16.1, we can forgo reading the data entirely, resulting a ~2200% speed-up on the same test (128s to 6s). # mkdir rootdir # dd if=/dev/urandom of=rootdir/file bs=4096 count=4194304 (without ioctl) # echo 3 > /proc/sys/vm/drop_caches # time mkfs.btrfs --rootdir rootdir testimg ... real 3m37.965s user 0m5.496s sys 0m6.125s # echo 3 > /proc/sys/vm/drop_caches # time mkfs.btrfs --rootdir rootdir --reflink testimg ... real 2m8.342s user 0m5.472s sys 0m1.667s (with ioctl) # echo 3 > /proc/sys/vm/drop_caches # time mkfs.btrfs --rootdir rootdir testimg ... real 3m15.865s user 0m4.258s sys 0m6.261s # echo 3 > /proc/sys/vm/drop_caches # time mkfs.btrfs --rootdir rootdir --reflink testimg ... real 0m5.847s user 0m2.899s sys 0m0.097s Another notable use case is for deduplication, where reading the checksums may serve as a hint instead of reading the whole file data. Reviewed-by: Qu Wenruo Signed-off-by: Mark Harmstone Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 9165154a274d..9b576603b3f1 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -1100,6 +1100,38 @@ enum btrfs_err_code { BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET, }; +/* Flags for struct btrfs_ioctl_get_csums_entry::type. */ +#define BTRFS_GET_CSUMS_HAS_CSUMS (1U << 0) +#define BTRFS_GET_CSUMS_ZEROED (1U << 1) +#define BTRFS_GET_CSUMS_NODATASUM (1U << 2) +#define BTRFS_GET_CSUMS_COMPRESSED (1U << 3) +#define BTRFS_GET_CSUMS_ENCRYPTED (1U << 4) +#define BTRFS_GET_CSUMS_INLINE (1U << 5) + +struct btrfs_ioctl_get_csums_entry { + /* File offset of this range. */ + __u64 offset; + /* Length in bytes. */ + __u64 length; + /* One of BTRFS_GET_CSUMS_* types. */ + __u32 type; + /* Padding, must be 0. */ + __u32 reserved; +}; + +struct btrfs_ioctl_get_csums_args { + /* In/out: file offset in bytes. */ + __u64 offset; + /* In/out: range length in bytes. */ + __u64 length; + /* In/out: buffer capacity / bytes written. */ + __u64 buf_size; + /* In: flags, must be 0 for now. */ + __u64 flags; + /* Out: entries of type btrfs_ioctl_get_csums_entry + csum data */ + __u8 buf[]; +}; + /* Flags for IOC_SHUTDOWN, must match XFS_FSOP_GOING_FLAGS_* flags. */ #define BTRFS_SHUTDOWN_FLAGS_DEFAULT 0x0 #define BTRFS_SHUTDOWN_FLAGS_LOGFLUSH 0x1 @@ -1226,6 +1258,8 @@ enum btrfs_err_code { struct btrfs_ioctl_encoded_io_args) #define BTRFS_IOC_SUBVOL_SYNC_WAIT _IOW(BTRFS_IOCTL_MAGIC, 65, \ struct btrfs_ioctl_subvol_wait) +#define BTRFS_IOC_GET_CSUMS _IOWR(BTRFS_IOCTL_MAGIC, 66, \ + struct btrfs_ioctl_get_csums_args) /* Shutdown ioctl should follow XFS's interfaces, thus not using btrfs magic. */ #define BTRFS_IOC_SHUTDOWN _IOR('X', 125, __u32) -- cgit v1.2.3 From c69e110455f49eb625623076b3bbd1be0e7362a9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 17 Apr 2026 18:06:32 +0100 Subject: btrfs: tracepoints: remove double negation in finish ordered extent event There is no need to add a double negation (!!) to the update field because the field has a boolean type. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index ec1df8b94517..99ae9c923070 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -670,7 +670,7 @@ TRACE_EVENT(btrfs_finish_ordered_extent, TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu uptodate=%d", show_root_type(__entry->root_objectid), __entry->ino, __entry->start, - __entry->len, !!__entry->uptodate) + __entry->len, __entry->uptodate) ); DECLARE_EVENT_CLASS(btrfs__writepage, -- cgit v1.2.3 From 645927cefdb5e024480b5639105fa53a52f986d8 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 17 Apr 2026 17:22:59 +0100 Subject: btrfs: tracepoints: remove pointless root field from transaction commit event A transaction commit is global, not per root, and we are currently always emitting a root id field matching the root tree for no good reason at all, causing confusion for no reason at all. So remove the root field. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 99ae9c923070..a8cdc50677a5 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -188,17 +188,13 @@ TRACE_EVENT(btrfs_transaction_commit, TP_STRUCT__entry_btrfs( __field( u64, generation ) - __field( u64, root_objectid ) ), TP_fast_assign_btrfs(fs_info, __entry->generation = fs_info->generation; - __entry->root_objectid = BTRFS_ROOT_TREE_OBJECTID; ), - TP_printk_btrfs("root=%llu(%s) gen=%llu", - show_root_type(__entry->root_objectid), - __entry->generation) + TP_printk_btrfs("gen=%llu", __entry->generation) ); DECLARE_EVENT_CLASS(btrfs__inode, -- cgit v1.2.3 From e1ad307c7ab1af0b5c14549b23982464c22996cc Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 17 Apr 2026 17:43:19 +0100 Subject: btrfs: tracepoints: pass a transaction handle to transaction commit event The transaction commit tracepoint prints fs_info->generation as if it were the ID of the committed transaction but this does not always match that ID. This is because the trace point is called in the transaction commit path after the transaction is in the TRANS_STATE_COMPLETED state, which means another transaction may have already started (which can happen as soon as the transaction state was set to TRANS_STATE_UNBLOCKED), in which case fs_info->generation was incremented and does not correspond to the committed transaction anymore. So fix this by passing a transaction handle to the trace event instead of fs_info. This will also allow later for the trace event to dump other useful information about the transaction. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 4 ++-- include/trace/events/btrfs.h | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0fd596e2c65b..b98cb7b0630a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2114,7 +2114,7 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err) btrfs_put_transaction(cur_trans); btrfs_put_transaction(cur_trans); - trace_btrfs_transaction_commit(fs_info); + trace_btrfs_transaction_commit(trans); if (current->journal_info == trans) current->journal_info = NULL; @@ -2632,7 +2632,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) if (trans->type & __TRANS_FREEZABLE) sb_end_intwrite(fs_info->sb); - trace_btrfs_transaction_commit(fs_info); + trace_btrfs_transaction_commit(trans); btrfs_scrub_continue(fs_info); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index a8cdc50677a5..4cec2f8838f5 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -31,6 +31,7 @@ struct btrfs_space_info; struct btrfs_raid_bio; struct raid56_bio_trace_info; struct find_free_extent_ctl; +struct btrfs_trans_handle; #define show_ref_type(type) \ __print_symbolic(type, \ @@ -182,16 +183,16 @@ FLUSH_STATES TRACE_EVENT(btrfs_transaction_commit, - TP_PROTO(const struct btrfs_fs_info *fs_info), + TP_PROTO(const struct btrfs_trans_handle *trans), - TP_ARGS(fs_info), + TP_ARGS(trans), TP_STRUCT__entry_btrfs( __field( u64, generation ) ), - TP_fast_assign_btrfs(fs_info, - __entry->generation = fs_info->generation; + TP_fast_assign_btrfs(trans->fs_info, + __entry->generation = trans->transid; ), TP_printk_btrfs("gen=%llu", __entry->generation) -- cgit v1.2.3 From 889ad3845a8c029e93a46251f78d61b277cf5e61 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 17 Apr 2026 18:12:16 +0100 Subject: btrfs: tracepoints: add in_fsync field to transaction commit event Include the in_fsync value from the transaction handle so that we can know if a transaction commit was triggered by a fsync call. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 4cec2f8838f5..4e077abd6704 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -189,13 +189,16 @@ TRACE_EVENT(btrfs_transaction_commit, TP_STRUCT__entry_btrfs( __field( u64, generation ) + __field( bool, in_fsync ) ), TP_fast_assign_btrfs(trans->fs_info, __entry->generation = trans->transid; + __entry->in_fsync = trans->in_fsync; ), - TP_printk_btrfs("gen=%llu", __entry->generation) + TP_printk_btrfs("gen=%llu in_fsync=%d", __entry->generation, + __entry->in_fsync) ); DECLARE_EVENT_CLASS(btrfs__inode, -- cgit v1.2.3 From b4bbe7d7591e65cf9cc4975ea124fc717df9f128 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 23 Apr 2026 15:17:16 +0100 Subject: btrfs: tracepoints: add trace event for transaction aborts While tracing it's useful to know not just when a transaction is committed but also when one is aborted. So add a trace event for transaction aborts. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 1 + include/trace/events/btrfs.h | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b98cb7b0630a..277953906b91 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2731,6 +2731,7 @@ void __cold __btrfs_abort_transaction(struct btrfs_trans_handle *trans, WRITE_ONCE(trans->aborted, error); WRITE_ONCE(trans->transaction->aborted, error); + trace_btrfs_transaction_abort(trans); if (first_hit) { btrfs_err(fs_info, "Transaction %llu aborted (error %d)", trans->transid, error); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 4e077abd6704..cb9b6188fcdd 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -201,6 +201,28 @@ TRACE_EVENT(btrfs_transaction_commit, __entry->in_fsync) ); +TRACE_EVENT(btrfs_transaction_abort, + + TP_PROTO(const struct btrfs_trans_handle *trans), + + TP_ARGS(trans), + + TP_STRUCT__entry_btrfs( + __field( u64, generation ) + __field( bool, in_fsync ) + __field( int, error ) + ), + + TP_fast_assign_btrfs(trans->fs_info, + __entry->generation = trans->transid; + __entry->in_fsync = trans->in_fsync; + __entry->error = trans->aborted; + ), + + TP_printk_btrfs("gen=%llu in_fsync=%d error=%d", __entry->generation, + __entry->in_fsync, __entry->error) +); + DECLARE_EVENT_CLASS(btrfs__inode, TP_PROTO(const struct inode *inode), -- cgit v1.2.3 From 99314d7cc711b4105a9546e63a210ead0d3f6178 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 23 Apr 2026 15:56:52 +0100 Subject: btrfs: tracepoints: add trace event for the start of a new transaction While tracing it's useful to know not just when a transaction is committed or aborted, but also when a new one is started. So add a trace event for transaction starts. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 1 + include/trace/events/btrfs.h | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'include') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 277953906b91..9ff8792d7182 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -394,6 +394,7 @@ loop: cur_trans->transid = fs_info->generation; fs_info->running_transaction = cur_trans; cur_trans->aborted = 0; + trace_btrfs_transaction_start(cur_trans); spin_unlock(&fs_info->trans_lock); return 0; diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index cb9b6188fcdd..e43528003848 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -32,6 +32,7 @@ struct btrfs_raid_bio; struct raid56_bio_trace_info; struct find_free_extent_ctl; struct btrfs_trans_handle; +struct btrfs_transaction; #define show_ref_type(type) \ __print_symbolic(type, \ @@ -181,6 +182,23 @@ FLUSH_STATES #define TP_printk_btrfs(fmt, args...) \ TP_printk("%pU: " fmt, __entry->fsid, args) +TRACE_EVENT(btrfs_transaction_start, + + TP_PROTO(const struct btrfs_transaction *trans), + + TP_ARGS(trans), + + TP_STRUCT__entry_btrfs( + __field( u64, generation ) + ), + + TP_fast_assign_btrfs(trans->fs_info, + __entry->generation = trans->transid; + ), + + TP_printk_btrfs("gen=%llu", __entry->generation) +); + TRACE_EVENT(btrfs_transaction_commit, TP_PROTO(const struct btrfs_trans_handle *trans), -- cgit v1.2.3 From 17819dc282f15d49fac8d7eb94d571e69c22cbdf Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 23 Apr 2026 17:05:23 +0100 Subject: btrfs: tracepoints: trace transaction states during commit phase Currently the trace event is fired only when a transaction is fully complete (its state is TRANS_STATE_COMPLETED). However during a transaction commit we go through several states and as soon as the state reaches TRANS_STATE_UNBLOCKED, another transaction can start. Therefore it's useful to track every transaction state changed during the commit of a transaction, so that we can see if a new transaction is started before the current one is completed. Add the transaction state to the transaction commit event and call the event everytime we change the transaction state during commit. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 8 ++++++-- include/trace/events/btrfs.h | 17 +++++++++++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9ff8792d7182..aef2462b25d8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2321,6 +2321,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) } cur_trans->state = TRANS_STATE_COMMIT_PREP; + trace_btrfs_transaction_commit(trans); wake_up(&fs_info->transaction_blocked_wait); btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP); @@ -2359,6 +2360,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) } cur_trans->state = TRANS_STATE_COMMIT_START; + trace_btrfs_transaction_commit(trans); wake_up(&fs_info->transaction_blocked_wait); spin_unlock(&fs_info->trans_lock); @@ -2414,6 +2416,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) spin_lock(&fs_info->trans_lock); add_pending_snapshot(trans); cur_trans->state = TRANS_STATE_COMMIT_DOING; + trace_btrfs_transaction_commit(trans); spin_unlock(&fs_info->trans_lock); /* @@ -2562,6 +2565,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) spin_lock(&fs_info->trans_lock); cur_trans->state = TRANS_STATE_UNBLOCKED; + trace_btrfs_transaction_commit(trans); fs_info->running_transaction = NULL; spin_unlock(&fs_info->trans_lock); mutex_unlock(&fs_info->reloc_mutex); @@ -2604,6 +2608,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * which can change it. */ cur_trans->state = TRANS_STATE_SUPER_COMMITTED; + trace_btrfs_transaction_commit(trans); wake_up(&cur_trans->commit_wait); btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED); @@ -2620,6 +2625,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * which can change it. */ cur_trans->state = TRANS_STATE_COMPLETED; + trace_btrfs_transaction_commit(trans); wake_up(&cur_trans->commit_wait); btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED); @@ -2633,8 +2639,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) if (trans->type & __TRANS_FREEZABLE) sb_end_intwrite(fs_info->sb); - trace_btrfs_transaction_commit(trans); - btrfs_scrub_continue(fs_info); if (current->journal_info == trans) diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index e43528003848..801f4793e002 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -105,6 +105,15 @@ struct btrfs_transaction; EM( COMMIT_TRANS, "COMMIT_TRANS") \ EMe(RESET_ZONES, "RESET_ZONES") +#define TRANSACTION_STATES \ + EM( TRANS_STATE_RUNNING, "TRANS_STATE_RUNNING") \ + EM( TRANS_STATE_COMMIT_PREP, "TRANS_STATE_COMMIT_PREP") \ + EM( TRANS_STATE_COMMIT_START, "TRANS_STATE_COMMIT_START") \ + EM( TRANS_STATE_COMMIT_DOING, "TRANS_STATE_COMMIT_DOING") \ + EM( TRANS_STATE_UNBLOCKED, "TRANS_STATE_UNBLOCKED") \ + EM( TRANS_STATE_SUPER_COMMITTED, "TRANS_STATE_SUPER_COMMITTED") \ + EMe(TRANS_STATE_COMPLETED, "TRANS_STATE_COMPLETED") + /* * First define the enums in the above macros to be exported to userspace via * TRACE_DEFINE_ENUM(). @@ -120,6 +129,7 @@ FI_TYPES QGROUP_RSV_TYPES IO_TREE_OWNER FLUSH_STATES +TRANSACTION_STATES /* * Now redefine the EM and EMe macros to map the enums to the strings that will @@ -208,15 +218,18 @@ TRACE_EVENT(btrfs_transaction_commit, TP_STRUCT__entry_btrfs( __field( u64, generation ) __field( bool, in_fsync ) + __field( int, state ) ), TP_fast_assign_btrfs(trans->fs_info, __entry->generation = trans->transid; __entry->in_fsync = trans->in_fsync; + __entry->state = trans->transaction->state; ), - TP_printk_btrfs("gen=%llu in_fsync=%d", __entry->generation, - __entry->in_fsync) + TP_printk_btrfs("gen=%llu in_fsync=%d state=%d(%s)", __entry->generation, + __entry->in_fsync, __entry->state, + __print_symbolic(__entry->state, TRANSACTION_STATES)) ); TRACE_EVENT(btrfs_transaction_abort, -- cgit v1.2.3 From 658d72fe491380582ddfa7e536142d829a7b3688 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 28 Apr 2026 15:32:15 +0100 Subject: btrfs: tracepoints: add trace event for when fsync finishes Currently we only have a trace event for when a fsync operation starts, but this alone is not very helpful. Add a trace event for when fsync finishes, which reports its return value, so that using tracing we can see which other trace events happened in between (several will be added soon for inode logging steps) and even measure execution time. So rename the existing trace event btrfs_sync_file to btrfs_sync_file_enter and add the trace event btrfs_sync_file_exit. The naming is similar to what ext4 does (ext4_sync_file_enter and ext4_sync_file_exit) and with similar information reported. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 4 +++- include/trace/events/btrfs.h | 28 +++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f77ccef837b9..d786b9666755 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1564,7 +1564,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) btrfs_assert_inode_locked(inode); } - trace_btrfs_sync_file(file, datasync); + trace_btrfs_sync_file_enter(file, datasync); btrfs_init_log_ctx(&ctx, inode); @@ -1810,6 +1810,8 @@ out: err = file_check_and_advance_wb_err(file); if (!ret) ret = err; + trace_btrfs_sync_file_exit(file, ret); + return ret; out_release_extents: diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 801f4793e002..1b19364eabff 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -808,7 +808,7 @@ TRACE_EVENT(btrfs_writepage_end_io_hook, __entry->end, __entry->uptodate) ); -TRACE_EVENT(btrfs_sync_file, +TRACE_EVENT(btrfs_sync_file_enter, TP_PROTO(const struct file *file, int datasync), @@ -840,6 +840,32 @@ TRACE_EVENT(btrfs_sync_file, __entry->datasync) ); +TRACE_EVENT(btrfs_sync_file_exit, + + TP_PROTO(const struct file *file, int ret), + + TP_ARGS(file, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, ino ) + __field( int, ret ) + __field( u64, root_objectid ) + ), + + TP_fast_assign( + struct btrfs_inode *inode = BTRFS_I(file_inode(file)); + + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu ret=%d", + show_root_type(__entry->root_objectid), + __entry->ino, __entry->ret) +); + TRACE_EVENT(btrfs_sync_fs, TP_PROTO(const struct btrfs_fs_info *fs_info, int wait), -- cgit v1.2.3 From d32609208782127d99917fb7aeee26738ad89e3f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 28 Apr 2026 16:52:09 +0100 Subject: btrfs: tracepoints: add trace event for btrfs_log_inode_parent() btrfs_log_inode_parent() is one of the most important steps called during a fsync operation as well as during rename and link operations on inodes that were previously logged. Add trace events for when entering and exiting that function. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 31 +++++++++++----- include/trace/events/btrfs.h | 85 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9123adafa0d1..f03bc6588210 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -7541,27 +7541,37 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, int ret = 0; bool log_dentries; - if (btrfs_test_opt(fs_info, NOTREELOG)) - return BTRFS_LOG_FORCE_COMMIT; + trace_btrfs_log_inode_parent_enter(trans, inode); - if (btrfs_root_refs(&root->root_item) == 0) - return BTRFS_LOG_FORCE_COMMIT; + if (btrfs_test_opt(fs_info, NOTREELOG)) { + ret = BTRFS_LOG_FORCE_COMMIT; + goto out; + } + + if (btrfs_root_refs(&root->root_item) == 0) { + ret = BTRFS_LOG_FORCE_COMMIT; + goto out; + } /* * If we're logging an inode from a subvolume created in the current * transaction we must force a commit since the root is not persisted. */ - if (btrfs_root_generation(&root->root_item) == trans->transid) - return BTRFS_LOG_FORCE_COMMIT; + if (btrfs_root_generation(&root->root_item) == trans->transid) { + ret = BTRFS_LOG_FORCE_COMMIT; + goto out; + } /* Skip already logged inodes and without new extents. */ if (btrfs_inode_in_log(inode, trans->transid) && - list_empty(&ctx->ordered_extents)) - return BTRFS_NO_LOG_SYNC; + list_empty(&ctx->ordered_extents)) { + ret = BTRFS_NO_LOG_SYNC; + goto out; + } ret = start_log_trans(trans, root, ctx); if (ret) - return ret; + goto out; ret = btrfs_log_inode(trans, inode, inode_only, ctx); if (ret) @@ -7649,6 +7659,9 @@ end_trans: btrfs_remove_log_ctx(root, ctx); btrfs_end_log_trans(root); +out: + trace_btrfs_log_inode_parent_exit(trans, inode, ret); + return ret; } diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 1b19364eabff..cd127c79a535 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -34,6 +34,16 @@ struct find_free_extent_ctl; struct btrfs_trans_handle; struct btrfs_transaction; +#define show_inode_type(mode) \ + __print_symbolic((mode) & S_IFMT, \ + { S_IFDIR, "DIR" }, \ + { S_IFREG, "REG" }, \ + { S_IFLNK, "LNK" }, \ + { S_IFIFO, "FIFO" }, \ + { S_IFCHR, "CHR" }, \ + { S_IFBLK, "BLK" }, \ + { S_IFSOCK, "SOCK" }) + #define show_ref_type(type) \ __print_symbolic(type, \ { BTRFS_TREE_BLOCK_REF_KEY, "TREE_BLOCK_REF" }, \ @@ -866,6 +876,81 @@ TRACE_EVENT(btrfs_sync_file_exit, __entry->ino, __entry->ret) ); +TRACE_EVENT(btrfs_log_inode_parent_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, struct btrfs_inode *inode), + + TP_ARGS(trans, inode), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( umode_t, mode ) + __field( u64, transid ) + __field( u64, generation ) + __field( u64, logged_trans ) + __field( u64, last_unlink_trans ) + __field( int, last_sub_trans ) + __field( int, inode_last_log_commit ) + __field( int, root_last_log_commit ) + ), + + TP_fast_assign( + struct btrfs_root *root = inode->root; + + TP_fast_assign_fsid(root->fs_info); + __entry->root_objectid = btrfs_root_id(root); + __entry->ino = btrfs_ino(inode); + __entry->mode = inode->vfs_inode.i_mode; + __entry->transid = trans->transid; + __entry->generation = inode->generation; + spin_lock(&inode->lock); + __entry->logged_trans = inode->logged_trans; + __entry->last_unlink_trans = inode->last_unlink_trans; + __entry->last_sub_trans = inode->last_sub_trans; + __entry->inode_last_log_commit = inode->last_log_commit; + spin_unlock(&inode->lock); + __entry->root_last_log_commit = btrfs_get_root_last_log_commit(root); + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu type=%s transid=%llu gen=%llu" + " logged_trans=%llu last_unlink_trans=%llu last_sub_trans=%d" + " inode_last_log_commit=%d root_last_log_commit=%d", + show_root_type(__entry->root_objectid), __entry->ino, + show_inode_type(__entry->mode), __entry->transid, + __entry->generation, __entry->logged_trans, + __entry->last_unlink_trans, __entry->last_sub_trans, + __entry->inode_last_log_commit, __entry->root_last_log_commit) +); + +TRACE_EVENT(btrfs_log_inode_parent_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode, + int ret), + + TP_ARGS(trans, inode, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( u64, transid ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->transid = trans->transid; + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu transid=%llu ret=%d", + show_root_type(__entry->root_objectid), __entry->ino, + __entry->transid, __entry->ret) +); + TRACE_EVENT(btrfs_sync_fs, TP_PROTO(const struct btrfs_fs_info *fs_info, int wait), -- cgit v1.2.3 From e2f67524f4855c5fd8b7d853f283f35222ae875f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 30 Apr 2026 16:05:56 +0100 Subject: btrfs: tracepoints: add trace event for btrfs_log_inode() btrfs_log_inode() is one of the most important steps called during a fsync, as well as during rename and link operations on inodes that were previously logged. Add trace events for when entering and exiting that function. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/super.c | 1 + fs/btrfs/tree-log.c | 16 ++++-- include/trace/events/btrfs.h | 116 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index fb15decb0861..9de67276a8ed 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -60,6 +60,7 @@ #include "verity.h" #include "super.h" #include "extent-tree.h" +#include "tree-log.h" #define CREATE_TRACE_POINTS #include diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 70795cd76d6f..22066635f75f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6884,7 +6884,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_log_ctx *ctx) { struct btrfs_path *path; - struct btrfs_path *dst_path; + struct btrfs_path *dst_path = NULL; struct btrfs_key min_key; struct btrfs_key max_key; struct btrfs_root *log = inode->root->log_root; @@ -6900,13 +6900,17 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, LIST_HEAD(delayed_ins_list); LIST_HEAD(delayed_del_list); + trace_btrfs_log_inode_enter(trans, inode, ctx, log_mode); + path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + ret = -ENOMEM; + goto out; + } dst_path = btrfs_alloc_path(); if (!dst_path) { - btrfs_free_path(path); - return -ENOMEM; + ret = -ENOMEM; + goto out; } min_key.objectid = ino; @@ -7221,6 +7225,8 @@ out: &delayed_del_list); } + trace_btrfs_log_inode_exit(trans, inode, ret); + return ret; } diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index cd127c79a535..9f195d0d5378 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -33,6 +33,7 @@ struct raid56_bio_trace_info; struct find_free_extent_ctl; struct btrfs_trans_handle; struct btrfs_transaction; +struct btrfs_log_ctx; #define show_inode_type(mode) \ __print_symbolic((mode) & S_IFMT, \ @@ -124,6 +125,9 @@ struct btrfs_transaction; EM( TRANS_STATE_SUPER_COMMITTED, "TRANS_STATE_SUPER_COMMITTED") \ EMe(TRANS_STATE_COMPLETED, "TRANS_STATE_COMPLETED") +#define LOG_MODES \ + EM( LOG_INODE_ALL, "LOG_INODE_ALL") \ + EMe(LOG_INODE_EXISTS, "LOG_INODE_EXISTS") /* * First define the enums in the above macros to be exported to userspace via * TRACE_DEFINE_ENUM(). @@ -140,6 +144,7 @@ QGROUP_RSV_TYPES IO_TREE_OWNER FLUSH_STATES TRANSACTION_STATES +LOG_MODES /* * Now redefine the EM and EMe macros to map the enums to the strings that will @@ -951,6 +956,117 @@ TRACE_EVENT(btrfs_log_inode_parent_exit, __entry->transid, __entry->ret) ); +TRACE_EVENT(btrfs_log_inode_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, struct btrfs_inode *inode, + const struct btrfs_log_ctx *ctx, int log_mode), + + TP_ARGS(trans, inode, ctx, log_mode), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( umode_t, mode ) + __field( u64, transid ) + __field( u64, generation ) + __field( u64, logged_trans ) + __field( u64, last_unlink_trans ) + __field( u64, last_reflink_trans ) + __field( int, last_sub_trans ) + __field( int, last_log_commit ) + __field( bool, logging_new_name ) + __field( bool, logging_new_delayed_dentries ) + __field( bool, is_conflict_inode ) + __field( bool, full_sync ) + __field( bool, copy_everything ) + __field( bool, no_xattrs ) + __field( int, log_mode ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->mode = inode->vfs_inode.i_mode; + __entry->transid = trans->transid; + __entry->generation = inode->generation; + spin_lock(&inode->lock); + __entry->logged_trans = inode->logged_trans; + __entry->last_unlink_trans = inode->last_unlink_trans; + __entry->last_reflink_trans = inode->last_reflink_trans; + __entry->last_sub_trans = inode->last_sub_trans; + __entry->last_log_commit = inode->last_log_commit; + spin_unlock(&inode->lock); + __entry->logging_new_name = ctx->logging_new_name; + __entry->logging_new_delayed_dentries = ctx->logging_new_delayed_dentries; + __entry->is_conflict_inode = ctx->logging_conflict_inodes; + __entry->full_sync = + test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); + __entry->copy_everything = + test_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags); + __entry->no_xattrs = + test_bit(BTRFS_INODE_NO_XATTRS, &inode->runtime_flags); + __entry->log_mode = log_mode; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu type=%s transid=%llu gen=%llu" + " logged_trans=%llu last_unlink_trans=%llu" + " last_reflink_trans=%llu last_sub_trans=%d last_log_commit=%d" + " logging_new_name=%d logging_new_delayed_dentries=%d" + " is_conflict_inode=%d full_sync=%d copy_everything=%d" + " no_xattrs=%d log_mode=%d(%s)", + show_root_type(__entry->root_objectid), __entry->ino, + show_inode_type(__entry->mode), __entry->transid, + __entry->generation, __entry->logged_trans, + __entry->last_unlink_trans, __entry->last_reflink_trans, + __entry->last_sub_trans, __entry->last_log_commit, + __entry->logging_new_name, __entry->logging_new_delayed_dentries, + __entry->is_conflict_inode, __entry->log_mode, + __entry->full_sync, __entry->copy_everything, __entry->no_xattrs, + __print_symbolic(__entry->log_mode, LOG_MODES)) +); + +TRACE_EVENT(btrfs_log_inode_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, struct btrfs_inode *inode, + int ret), + + TP_ARGS(trans, inode, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( u64, transid ) + __field( u64, logged_trans ) + __field( u64, last_reflink_trans ) + __field( int, last_sub_trans ) + __field( int, last_log_commit ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->transid = trans->transid; + spin_lock(&inode->lock); + __entry->logged_trans = inode->logged_trans; + __entry->last_reflink_trans = inode->last_reflink_trans; + __entry->last_sub_trans = inode->last_sub_trans; + __entry->last_log_commit = inode->last_log_commit; + spin_unlock(&inode->lock); + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu transid=%llu logged_trans=%llu" + " last_reflink_trans=%llu last_sub_trans=%d" + " last_log_commit=%d ret=%d", + show_root_type(__entry->root_objectid), __entry->ino, + __entry->transid, __entry->logged_trans, + __entry->last_reflink_trans, __entry->last_sub_trans, + __entry->last_log_commit, __entry->ret) +); + TRACE_EVENT(btrfs_sync_fs, TP_PROTO(const struct btrfs_fs_info *fs_info, int wait), -- cgit v1.2.3 From 0f24ea456ae16cf489080f7cdb565f55792e72b7 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 30 Apr 2026 17:10:01 +0100 Subject: btrfs: tracepoints: add trace event for btrfs_log_all_parents() btrfs_log_all_parents() is an important step called during a fsync, as well as during rename and link operations on inodes that were previously logged. Add trace events for when entering and exiting that function. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 29 ++++++++++++++++-------- include/trace/events/btrfs.h | 53 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 22066635f75f..90f5d1c830e4 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -7240,9 +7240,13 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, struct btrfs_root *root = inode->root; const u64 ino = btrfs_ino(inode); + trace_btrfs_log_all_parents_enter(trans, inode); + path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + ret = -ENOMEM; + goto out; + } path->skip_locking = true; path->search_commit_root = true; @@ -7251,7 +7255,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, key.offset = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) - return ret; + goto out; while (true) { struct extent_buffer *leaf = path->nodes[0]; @@ -7263,9 +7267,11 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, if (slot >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); if (ret < 0) - return ret; - if (ret > 0) + goto out; + if (ret > 0) { + ret = 0; break; + } continue; } @@ -7318,8 +7324,10 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, * at both parents and the old parent B would still * exist. */ - if (IS_ERR(dir_inode)) - return PTR_ERR(dir_inode); + if (IS_ERR(dir_inode)) { + ret = PTR_ERR(dir_inode); + goto out; + } if (!need_log_inode(trans, dir_inode)) { btrfs_add_delayed_iput(dir_inode); @@ -7332,11 +7340,14 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, ret = log_new_dir_dentries(trans, dir_inode, ctx); btrfs_add_delayed_iput(dir_inode); if (ret) - return ret; + goto out; } path->slots[0]++; } - return 0; +out: + trace_btrfs_log_all_parents_exit(trans, inode, ret); + + return ret; } static int log_new_ancestors(struct btrfs_trans_handle *trans, diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 9f195d0d5378..2c145240d809 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1067,6 +1067,59 @@ TRACE_EVENT(btrfs_log_inode_exit, __entry->last_log_commit, __entry->ret) ); +TRACE_EVENT(btrfs_log_all_parents_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode), + + TP_ARGS(trans, inode), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( u64, transid ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->transid = trans->transid; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu transid=%llu", + show_root_type(__entry->root_objectid), __entry->ino, + __entry->transid) +); + +TRACE_EVENT(btrfs_log_all_parents_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode, + int ret), + + TP_ARGS(trans, inode, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( u64, transid ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->transid = trans->transid; + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu transid=%llu ret=%d", + show_root_type(__entry->root_objectid), __entry->ino, + __entry->transid, __entry->ret) +); + TRACE_EVENT(btrfs_sync_fs, TP_PROTO(const struct btrfs_fs_info *fs_info, int wait), -- cgit v1.2.3 From e61574f1a961760df2dfd164a967797ce3346249 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 5 May 2026 15:31:16 +0100 Subject: btrfs: tracepoints: add trace event for log_all_new_ancestors() log_all_new_ancestors() is an important step called during a fsync, as well as during rename and link operations on inodes that were previously logged. Add trace events for when entering and exiting that function. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 34 ++++++++++++++++++--------- include/trace/events/btrfs.h | 55 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 90f5d1c830e4..f34dc9771dab 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -7462,16 +7462,22 @@ static int log_all_new_ancestors(struct btrfs_trans_handle *trans, struct btrfs_key search_key; int ret; + trace_btrfs_log_all_new_ancestors_enter(trans, inode); + /* * For a single hard link case, go through a fast path that does not * need to iterate the fs/subvolume tree. */ - if (inode->vfs_inode.i_nlink < 2) - return log_new_ancestors_fast(trans, inode, parent, ctx); + if (inode->vfs_inode.i_nlink < 2) { + ret = log_new_ancestors_fast(trans, inode, parent, ctx); + goto out; + } path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + ret = -ENOMEM; + goto out; + } search_key.objectid = ino; search_key.type = BTRFS_INODE_REF_KEY; @@ -7479,7 +7485,7 @@ static int log_all_new_ancestors(struct btrfs_trans_handle *trans, again: ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); if (ret < 0) - return ret; + goto out; if (ret == 0) path->slots[0]++; @@ -7491,9 +7497,11 @@ again: if (slot >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); if (ret < 0) - return ret; - if (ret > 0) + goto out; + if (ret > 0) { + ret = 0; break; + } continue; } @@ -7509,8 +7517,10 @@ again: * this loop, etc). So just return some error to fallback to * a transaction commit. */ - if (found_key.type == BTRFS_INODE_EXTREF_KEY) - return -EMLINK; + if (found_key.type == BTRFS_INODE_EXTREF_KEY) { + ret = -EMLINK; + goto out; + } /* * Logging ancestors needs to do more searches on the fs/subvol @@ -7522,11 +7532,13 @@ again: ret = log_new_ancestors(trans, root, path, ctx); if (ret) - return ret; + goto out; btrfs_release_path(path); goto again; } - return 0; +out: + trace_btrfs_log_all_new_ancestors_exit(trans, inode, ret); + return ret; } /* diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 2c145240d809..d16de652a815 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1120,6 +1120,61 @@ TRACE_EVENT(btrfs_log_all_parents_exit, __entry->transid, __entry->ret) ); +TRACE_EVENT(btrfs_log_all_new_ancestors_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode), + + TP_ARGS(trans, inode), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( u64, transid ) + __field( unsigned int, nlink ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->transid = trans->transid; + __entry->nlink = inode->vfs_inode.i_nlink; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu transid=%llu nlink=%u", + show_root_type(__entry->root_objectid), __entry->ino, + __entry->transid, __entry->nlink) +); + +TRACE_EVENT(btrfs_log_all_new_ancestors_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode, + int ret), + + TP_ARGS(trans, inode, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( u64, transid ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->transid = trans->transid; + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu transid=%llu ret=%d", + show_root_type(__entry->root_objectid), __entry->ino, + __entry->transid, __entry->ret) +); + TRACE_EVENT(btrfs_sync_fs, TP_PROTO(const struct btrfs_fs_info *fs_info, int wait), -- cgit v1.2.3 From bb10bfba7526f6c1fc27dc60bd4f53b213dc3102 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 5 May 2026 15:59:37 +0100 Subject: btrfs: tracepoints: add trace event for log_new_dir_dentries() log_new_dir_dentries() is an important step called during a fsync, as well as during rename and link operations on inodes that were previously logged. Add trace events for when entering and exiting that function. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 10 +++++++-- include/trace/events/btrfs.h | 53 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f34dc9771dab..44c7d250b810 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5902,9 +5902,13 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, struct btrfs_inode *curr_inode = start_inode; int ret = 0; + trace_btrfs_log_new_dir_dentries_enter(trans, start_inode); + path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + ret = -ENOMEM; + goto out; + } /* Pairs with btrfs_add_delayed_iput below. */ ihold(&curr_inode->vfs_inode); @@ -6023,6 +6027,8 @@ out: kfree(dir_elem); } + trace_btrfs_log_new_dir_dentries_exit(trans, start_inode, ret); + return ret; } diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index d16de652a815..c13c1ce0b344 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1175,6 +1175,59 @@ TRACE_EVENT(btrfs_log_all_new_ancestors_exit, __entry->transid, __entry->ret) ); +TRACE_EVENT(btrfs_log_new_dir_dentries_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode), + + TP_ARGS(trans, inode), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( u64, transid ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->transid = trans->transid; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu transid=%llu", + show_root_type(__entry->root_objectid), __entry->ino, + __entry->transid) +); + +TRACE_EVENT(btrfs_log_new_dir_dentries_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode, + int ret), + + TP_ARGS(trans, inode, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( u64, transid ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->transid = trans->transid; + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu transid=%llu ret=%d", + show_root_type(__entry->root_objectid), __entry->ino, + __entry->transid, __entry->ret) +); + TRACE_EVENT(btrfs_sync_fs, TP_PROTO(const struct btrfs_fs_info *fs_info, int wait), -- cgit v1.2.3 From aa0487b003a814beedd1af32b79478d834d15b2d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 7 May 2026 11:17:55 +0100 Subject: btrfs: tracepoints: add trace event for add_conflicting_inode() add_conflicting_inode() is an important step called during a fsync, as well as during rename and link operations on inodes that were previously logged. Add trace events for when entering and exiting that function. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 42 ++++++++++++++++++---------- include/trace/events/btrfs.h | 66 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 44c7d250b810..63553d72ff1a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6121,6 +6121,9 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, { struct btrfs_ino_list *ino_elem; struct btrfs_inode *inode; + int ret = 0; + + trace_btrfs_add_conflicting_inode_enter(trans, ctx, ino, parent); /* * It's rare to have a lot of conflicting inodes, in practice it is not @@ -6129,8 +6132,10 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, * LOG_INODE_EXISTS mode) and slow down other fsyncs or transaction * commits. */ - if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) - return BTRFS_LOG_FORCE_COMMIT; + if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) { + ret = BTRFS_LOG_FORCE_COMMIT; + goto out; + } inode = btrfs_iget_logging(ino, root); /* @@ -6154,26 +6159,27 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, * some inode from it to some other directory). */ if (IS_ERR(inode)) { - int ret = PTR_ERR(inode); - + ret = PTR_ERR(inode); if (ret != -ENOENT) - return ret; + goto out; ret = conflicting_inode_is_dir(root, ino, path); /* Not a directory or we got an error. */ if (ret <= 0) - return ret; + goto out; /* Conflicting inode is a directory, so we'll log its parent. */ ino_elem = kmalloc_obj(*ino_elem, GFP_NOFS); - if (!ino_elem) - return -ENOMEM; + if (!ino_elem) { + ret = -ENOMEM; + goto out; + } ino_elem->ino = ino; ino_elem->parent = parent; list_add_tail(&ino_elem->list, &ctx->conflict_inodes); ctx->num_conflict_inodes++; - - return 0; + ret = 0; + goto out; } /* @@ -6213,25 +6219,31 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, */ if (!need_log_inode(trans, inode)) { btrfs_add_delayed_iput(inode); - return 0; + goto out; } if (!can_log_conflicting_inode(trans, inode)) { btrfs_add_delayed_iput(inode); - return BTRFS_LOG_FORCE_COMMIT; + ret = BTRFS_LOG_FORCE_COMMIT; + goto out; } btrfs_add_delayed_iput(inode); ino_elem = kmalloc_obj(*ino_elem, GFP_NOFS); - if (!ino_elem) - return -ENOMEM; + if (!ino_elem) { + ret = -ENOMEM; + goto out; + } ino_elem->ino = ino; ino_elem->parent = parent; list_add_tail(&ino_elem->list, &ctx->conflict_inodes); ctx->num_conflict_inodes++; - return 0; +out: + trace_btrfs_add_conflicting_inode_exit(trans, ctx, ino, parent, ret); + + return ret; } static int log_conflicting_inodes(struct btrfs_trans_handle *trans, diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index c13c1ce0b344..ad18b09fb3cd 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1228,6 +1228,72 @@ TRACE_EVENT(btrfs_log_new_dir_dentries_exit, __entry->transid, __entry->ret) ); +TRACE_EVENT(btrfs_add_conflicting_inode_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_log_ctx *ctx, + u64 ino, u64 parent), + + TP_ARGS(trans, ctx, ino, parent), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ctx_ino ) + __field( u64, conflict_ino ) + __field( u64, conflict_ino_parent ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(ctx->inode->root); + __entry->transid = trans->transid; + __entry->ctx_ino = btrfs_ino(ctx->inode); + __entry->conflict_ino = ino; + __entry->conflict_ino_parent = parent; + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ctx_ino=%llu conflict_ino=%llu" + " conflict_ino_parent=%llu", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ctx_ino, __entry->conflict_ino, + __entry->conflict_ino_parent) +); + +TRACE_EVENT(btrfs_add_conflicting_inode_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_log_ctx *ctx, + u64 ino, u64 parent, int ret), + + TP_ARGS(trans, ctx, ino, parent, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ctx_ino ) + __field( u64, conflict_ino ) + __field( u64, conflict_ino_parent ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(ctx->inode->root); + __entry->transid = trans->transid; + __entry->ctx_ino = btrfs_ino(ctx->inode); + __entry->conflict_ino = ino; + __entry->conflict_ino_parent = parent; + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ctx_ino=%llu conflict_ino=%llu" + " conflict_ino_parent=%llu ret=%d", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ctx_ino, __entry->conflict_ino, + __entry->conflict_ino_parent, __entry->ret) +); + TRACE_EVENT(btrfs_sync_fs, TP_PROTO(const struct btrfs_fs_info *fs_info, int wait), -- cgit v1.2.3 From aba309b893ab6ca02761119abeb565f8e71adc8b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 7 May 2026 13:03:13 +0100 Subject: btrfs: tracepoints: add trace event for log_conflicting_inodes() log_conflicting_inodes() is an important step called during a fsync, as well as during rename and link operations on inodes that were previously logged. Add trace events for when entering and exiting that function. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 9 ++++++++ include/trace/events/btrfs.h | 53 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) (limited to 'include') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 63553d72ff1a..f91924372494 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6261,7 +6261,15 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, if (ctx->logging_conflict_inodes) return 0; + /* + * Avoid any work if no conflicting inodes and emitting the trace event + * which only adds noise and it's useless if there are no inodes. + */ + if (list_empty(&ctx->conflict_inodes)) + return 0; + ctx->logging_conflict_inodes = true; + trace_btrfs_log_conflicting_inodes_enter(trans, ctx); /* * New conflicting inodes may be found and added to the list while we @@ -6355,6 +6363,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, ctx->logging_conflict_inodes = false; if (ret) free_conflicting_inodes(ctx); + trace_btrfs_log_conflicting_inodes_exit(trans, ctx, ret); return ret; } diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index ad18b09fb3cd..2eff817026a4 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1294,6 +1294,59 @@ TRACE_EVENT(btrfs_add_conflicting_inode_exit, __entry->conflict_ino_parent, __entry->ret) ); +TRACE_EVENT(btrfs_log_conflicting_inodes_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_log_ctx *ctx), + + TP_ARGS(trans, ctx), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ctx_ino ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(ctx->inode->root); + __entry->transid = trans->transid; + __entry->ctx_ino = btrfs_ino(ctx->inode); + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ctx_ino=%llu", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ctx_ino) +); + +TRACE_EVENT(btrfs_log_conflicting_inodes_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_log_ctx *ctx, + int ret), + + TP_ARGS(trans, ctx, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ctx_ino ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(ctx->inode->root); + __entry->transid = trans->transid; + __entry->ctx_ino = btrfs_ino(ctx->inode); + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ctx_ino=%llu ret=%d", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ctx_ino, __entry->ret) +); + TRACE_EVENT(btrfs_sync_fs, TP_PROTO(const struct btrfs_fs_info *fs_info, int wait), -- cgit v1.2.3 From 5fe47577ad40b6b88fb189cdacdf102ee66ed5bc Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 8 May 2026 17:09:48 +0100 Subject: btrfs: tracepoints: add trace event for log_new_delayed_dentries() log_new_delayed_dentries() is an important step called during a fsync, as well as during rename and link operations on inodes that were previously logged. Add trace events for when entering and exiting that function. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 10 +++++++++ include/trace/events/btrfs.h | 53 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) (limited to 'include') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 12421ae640be..2ed15485fe5a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6844,6 +6844,15 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, lockdep_assert_not_held(&inode->log_mutex); ASSERT(!ctx->logging_new_delayed_dentries); + + /* + * Return early if empty list, avoid emitting redundant trace events + * that generate noise only. + */ + if (list_empty(delayed_ins_list)) + return 0; + + trace_btrfs_log_new_delayed_dentries_enter(trans, inode); ctx->logging_new_delayed_dentries = true; list_for_each_entry(item, delayed_ins_list, log_list) { @@ -6886,6 +6895,7 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, ctx->log_new_dentries = orig_log_new_dentries; ctx->logging_new_delayed_dentries = false; + trace_btrfs_log_new_delayed_dentries_exit(trans, inode, ret); return ret; } diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 2eff817026a4..dd7731b484aa 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1347,6 +1347,59 @@ TRACE_EVENT(btrfs_log_conflicting_inodes_exit, __entry->ctx_ino, __entry->ret) ); +TRACE_EVENT(btrfs_log_new_delayed_dentries_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode), + + TP_ARGS(trans, inode), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ino ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->transid = trans->transid; + __entry->ino = btrfs_ino(inode); + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ino=%llu", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ino) +); + +TRACE_EVENT(btrfs_log_new_delayed_dentries_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode, + int ret), + + TP_ARGS(trans, inode, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ino ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->transid = trans->transid; + __entry->ino = btrfs_ino(inode); + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ino=%llu ret=%d", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ino, __entry->ret) +); + TRACE_EVENT(btrfs_sync_fs, TP_PROTO(const struct btrfs_fs_info *fs_info, int wait), -- cgit v1.2.3 From bc620c48b51ef18ce9331ec0cc4301d3dc059ff3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 11 May 2026 15:51:13 +0100 Subject: btrfs: tracepoints: add trace event for btrfs_record_unlink_dir() btrfs_record_unlink_dir() is an important operation that affects inode logging and is called during unlink and rename operations. Add a trace event for it to help debug issues. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 2 ++ include/trace/events/btrfs.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'include') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2ed15485fe5a..627705faa851 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -7938,6 +7938,8 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, struct btrfs_inode *dir, struct btrfs_inode *inode, bool for_rename) { + trace_btrfs_record_unlink_dir(trans, dir, inode, for_rename); + /* * when we're logging a file, if it hasn't been renamed * or unlinked, and its inode is fully committed on disk, diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index dd7731b484aa..1571c445abe6 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1400,6 +1400,37 @@ TRACE_EVENT(btrfs_log_new_delayed_dentries_exit, __entry->ino, __entry->ret) ); +TRACE_EVENT(btrfs_record_unlink_dir, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *dir, + const struct btrfs_inode *inode, + bool for_rename), + + TP_ARGS(trans, dir, inode, for_rename), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ino ) + __field( u64, dir ) + __field( bool, for_rename ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->transid = trans->transid; + __entry->ino = btrfs_ino(inode); + __entry->dir = btrfs_ino(dir); + __entry->for_rename = for_rename; + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ino=%llu dir=%llu for_rename=%d", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ino, __entry->dir, __entry->for_rename) +); + TRACE_EVENT(btrfs_sync_fs, TP_PROTO(const struct btrfs_fs_info *fs_info, int wait), -- cgit v1.2.3 From e1443032400a7e9d205c75730e8035e2db779231 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 11 May 2026 16:05:13 +0100 Subject: btrfs: tracepoints: add trace event for btrfs_record_snapshot_destroy() btrfs_record_snapshot_destroy() is an important operation that affects inode logging and is called during subvolume/snapshot deletion as well as during rmdir. Add a trace event for it to help debug issues. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 2 ++ include/trace/events/btrfs.h | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'include') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 627705faa851..7f014e6be4b7 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -8002,6 +8002,8 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, struct btrfs_inode *dir) { + trace_btrfs_record_snapshot_destroy(trans, dir); + mutex_lock(&dir->log_mutex); dir->last_unlink_trans = trans->transid; mutex_unlock(&dir->log_mutex); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 1571c445abe6..a14a8d32a6f1 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1431,6 +1431,31 @@ TRACE_EVENT(btrfs_record_unlink_dir, __entry->ino, __entry->dir, __entry->for_rename) ); +TRACE_EVENT(btrfs_record_snapshot_destroy, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *dir), + + TP_ARGS(trans, dir), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, dir ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(dir->root); + __entry->transid = trans->transid; + __entry->dir = btrfs_ino(dir); + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu dir=%llu", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->dir) +); + TRACE_EVENT(btrfs_sync_fs, TP_PROTO(const struct btrfs_fs_info *fs_info, int wait), -- cgit v1.2.3 From 4f065ebdf8c5ad477dc7be57c76e0fcb50670a6c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 11 May 2026 16:13:18 +0100 Subject: btrfs: tracepoints: add trace event for btrfs_record_new_subvolume() btrfs_record_new_subvolume() is an important operation that affects inode logging and is called during subvolume creation. Add a trace event for it to help debug issues. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 2 ++ include/trace/events/btrfs.h | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'include') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7f014e6be4b7..3f3c87f580b3 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -8024,6 +8024,8 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans, struct btrfs_inode *dir) { + trace_btrfs_record_new_subvolume(trans, dir); + mutex_lock(&dir->log_mutex); dir->last_unlink_trans = trans->transid; mutex_unlock(&dir->log_mutex); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index a14a8d32a6f1..47e6f382e22a 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1456,6 +1456,31 @@ TRACE_EVENT(btrfs_record_snapshot_destroy, __entry->dir) ); +TRACE_EVENT(btrfs_record_new_subvolume, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *dir), + + TP_ARGS(trans, dir), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, dir ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(dir->root); + __entry->transid = trans->transid; + __entry->dir = btrfs_ino(dir); + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu dir=%llu", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->dir) +); + TRACE_EVENT(btrfs_sync_fs, TP_PROTO(const struct btrfs_fs_info *fs_info, int wait), -- cgit v1.2.3 From 8c2e738c61f17c3650fd56e0d52518f2904e7b5e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 11 May 2026 16:38:25 +0100 Subject: btrfs: tracepoints: add trace event for btrfs_log_new_name() btrfs_log_new_name() is an important function that affects inode logging and is called during link and rename operations. Add trace events for when entering and exiting that function. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 7 +++-- include/trace/events/btrfs.h | 66 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3f3c87f580b3..49ab92bc3aa2 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -8058,6 +8058,8 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, bool log_pinned = false; int ret; + trace_btrfs_log_new_name_enter(trans, inode, old_dir, old_dir_index); + /* The inode has a new name (ref/extref), so make sure we log it. */ set_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags); @@ -8080,7 +8082,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, goto out; } else if (ret == 0) { if (!old_dir) - return; + goto out; /* * If the inode was not logged and we are doing a rename (old_dir is not * NULL), check if old_dir was logged - if it was not we can return and @@ -8090,7 +8092,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, if (ret < 0) goto out; else if (ret == 0) - return; + goto out; } ret = 0; @@ -8189,6 +8191,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, btrfs_log_inode_parent(trans, inode, parent, LOG_INODE_EXISTS, &ctx); ASSERT(list_empty(&ctx.conflict_inodes)); out: + trace_btrfs_log_new_name_exit(trans, inode, old_dir, ret); /* * If an error happened mark the log for a full commit because it's not * consistent and up to date or we couldn't find out if one of the diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 47e6f382e22a..ad9ae2489782 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1481,6 +1481,72 @@ TRACE_EVENT(btrfs_record_new_subvolume, __entry->dir) ); +TRACE_EVENT(btrfs_log_new_name_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode, + const struct btrfs_inode *old_dir, + u64 old_dir_index), + + TP_ARGS(trans, inode, old_dir, old_dir_index), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ino ) + __field( umode_t, mode ) + __field( u64, old_dir_ino ) + __field( u64, old_dir_index ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->transid = trans->transid; + __entry->ino = btrfs_ino(inode); + __entry->mode = inode->vfs_inode.i_mode; + __entry->old_dir_ino = old_dir ? btrfs_ino(old_dir) : 0; + __entry->old_dir_index = old_dir_index; + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ino=%llu type=%s" + " old_dir=%llu old_dir_index=%llu", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ino, show_inode_type(__entry->mode), + __entry->old_dir_ino, __entry->old_dir_index) +); + +TRACE_EVENT(btrfs_log_new_name_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode, + const struct btrfs_inode *old_dir, + int ret), + + TP_ARGS(trans, inode, old_dir, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ino ) + __field( u64, old_dir_ino ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->transid = trans->transid; + __entry->ino = btrfs_ino(inode); + __entry->old_dir_ino = old_dir ? btrfs_ino(old_dir) : 0; + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ino=%llu old_dir=%llu ret=%d", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ino, __entry->old_dir_ino, __entry->ret) +); + TRACE_EVENT(btrfs_sync_fs, TP_PROTO(const struct btrfs_fs_info *fs_info, int wait), -- cgit v1.2.3 From 375336c17efa3d1ac62c4ecfde7c107ef3712f72 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 14 May 2026 16:11:43 +0100 Subject: btrfs: tracepoints: add trace event for btrfs_sync_log() btrfs_sync_log() is one of the main functions called during a fsync. Add trace events for when entering and exiting that function. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 15 ++++++++ include/trace/events/btrfs.h | 85 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) (limited to 'include') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 49ab92bc3aa2..875e4ddc68ea 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3322,8 +3322,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, u64 log_root_level; mutex_lock(&root->log_mutex); + trace_btrfs_sync_log_enter(trans, root, ctx); log_transid = ctx->log_transid; if (root->log_transid_committed >= log_transid) { + trace_btrfs_sync_log_exit(trans, root, ctx, ctx->log_ret); mutex_unlock(&root->log_mutex); return ctx->log_ret; } @@ -3331,6 +3333,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, index1 = log_transid % 2; if (atomic_read(&root->log_commit[index1])) { wait_log_commit(root, log_transid); + trace_btrfs_sync_log_exit(trans, root, ctx, ctx->log_ret); mutex_unlock(&root->log_mutex); return ctx->log_ret; } @@ -3359,6 +3362,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, /* bail out if we need to do a full commit */ if (btrfs_need_log_full_commit(trans)) { ret = BTRFS_LOG_FORCE_COMMIT; + trace_btrfs_sync_log_exit(trans, root, ctx, ret); mutex_unlock(&root->log_mutex); goto out; } @@ -3385,6 +3389,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (ret == -EAGAIN && btrfs_is_zoned(fs_info)) ret = 0; if (ret) { + trace_btrfs_sync_log_exit(trans, root, ctx, ret); blk_finish_plug(&plug); btrfs_set_log_full_commit(trans); mutex_unlock(&root->log_mutex); @@ -3422,6 +3427,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (!log_root_tree->node) { ret = btrfs_alloc_log_tree_node(trans, log_root_tree); if (ret) { + trace_btrfs_sync_log_exit(trans, root, ctx, ret); mutex_unlock(&fs_info->tree_root->log_mutex); blk_finish_plug(&plug); goto out; @@ -3445,6 +3451,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, */ ret = update_log_root(trans, log, &new_root_item); if (ret) { + trace_btrfs_sync_log_exit(trans, root, ctx, ret); list_del_init(&root_log_ctx.list); blk_finish_plug(&plug); btrfs_set_log_full_commit(trans); @@ -3462,6 +3469,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, list_del_init(&root_log_ctx.list); mutex_unlock(&log_root_tree->log_mutex); ret = root_log_ctx.log_ret; + trace_btrfs_sync_log_exit(trans, root, ctx, ret); goto out; } @@ -3473,6 +3481,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_unlock(&log_root_tree->log_mutex); if (!ret) ret = root_log_ctx.log_ret; + trace_btrfs_sync_log_exit(trans, root, ctx, ret); goto out; } ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid, @@ -3494,6 +3503,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, btrfs_wait_tree_log_extents(log, mark); mutex_unlock(&log_root_tree->log_mutex); ret = BTRFS_LOG_FORCE_COMMIT; + trace_btrfs_sync_log_exit(trans, root, ctx, ret); goto out_wake_log_root; } @@ -3507,11 +3517,13 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, * deadlock. Bail out to the full commit instead. */ if (ret == -EAGAIN && btrfs_is_zoned(fs_info)) { + trace_btrfs_sync_log_exit(trans, root, ctx, ret); btrfs_set_log_full_commit(trans); btrfs_wait_tree_log_extents(log, mark); mutex_unlock(&log_root_tree->log_mutex); goto out_wake_log_root; } else if (ret) { + trace_btrfs_sync_log_exit(trans, root, ctx, ret); btrfs_set_log_full_commit(trans); mutex_unlock(&log_root_tree->log_mutex); goto out_wake_log_root; @@ -3521,6 +3533,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = btrfs_wait_tree_log_extents(log_root_tree, EXTENT_DIRTY_LOG1 | EXTENT_DIRTY_LOG2); if (ret) { + trace_btrfs_sync_log_exit(trans, root, ctx, ret); btrfs_set_log_full_commit(trans); mutex_unlock(&log_root_tree->log_mutex); goto out_wake_log_root; @@ -3557,6 +3570,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, */ if (unlikely(BTRFS_FS_ERROR(fs_info))) { ret = -EIO; + trace_btrfs_sync_log_exit(trans, root, ctx, ret); btrfs_set_log_full_commit(trans); btrfs_abort_transaction(trans, ret); mutex_unlock(&fs_info->tree_log_mutex); @@ -3568,6 +3582,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = write_all_supers(trans); mutex_unlock(&fs_info->tree_log_mutex); if (unlikely(ret)) { + trace_btrfs_sync_log_exit(trans, root, ctx, ret); btrfs_set_log_full_commit(trans); btrfs_abort_transaction(trans, ret); goto out_wake_log_root; diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index ad9ae2489782..0e96633b8b4b 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1547,6 +1547,91 @@ TRACE_EVENT(btrfs_log_new_name_exit, __entry->ino, __entry->old_dir_ino, __entry->ret) ); +/* Ideally call this while under root->log_mutex (but not always possible). */ +TRACE_EVENT(btrfs_sync_log_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_root *root, + const struct btrfs_log_ctx *ctx), + + TP_ARGS(trans, root, ctx), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( int, ctx_log_transid ) + __field( int, root_log_transid ) + __field( int, log_transid_committed ) + __field( bool, log_committing ) + __field( bool, log_committing_prev ) + __field( int, log_writers ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(root); + __entry->transid = trans->transid; + __entry->ctx_log_transid = ctx->log_transid; + __entry->root_log_transid = btrfs_get_root_log_transid(root); + __entry->log_transid_committed = + data_race(root->log_transid_committed); + __entry->log_committing = + atomic_read(&root->log_commit[ctx->log_transid % 2]); + __entry->log_committing_prev = + atomic_read(&root->log_commit[(ctx->log_transid + 1) % 2]); + __entry->log_writers = atomic_read(&root->log_writers); + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ctx_log_transid=%d" + " root_log_transid=%d log_transid_committed=%d" + " log_committing=%d log_committing_prev=%d log_writers=%d", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ctx_log_transid, __entry->root_log_transid, + __entry->log_transid_committed, __entry->log_committing, + __entry->log_committing_prev, __entry->log_writers) +); + +/* + * Ideally call this while under root->log_mutex and in the same critical + * section that calls the btrfs_sync_log_enter() trace event (though it's not + * always possible). + */ +TRACE_EVENT(btrfs_sync_log_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_root *root, + const struct btrfs_log_ctx *ctx, + int ret), + + TP_ARGS(trans, root, ctx, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( int, ctx_log_transid ) + __field( int, root_log_transid ) + __field( int, log_transid_committed ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(root); + __entry->transid = trans->transid; + __entry->ctx_log_transid = ctx->log_transid; + __entry->root_log_transid = btrfs_get_root_log_transid(root); + __entry->log_transid_committed = + data_race(root->log_transid_committed); + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ctx_log_transid=%d" + " root_log_transid=%d log_transid_committed=%d ret=%d", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ctx_log_transid, __entry->root_log_transid, + __entry->log_transid_committed, __entry->ret) +); + TRACE_EVENT(btrfs_sync_fs, TP_PROTO(const struct btrfs_fs_info *fs_info, int wait), -- cgit v1.2.3 From d8576024fa1bee0e72e44ca8b5a6c95372717a99 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 14 May 2026 17:35:40 +0100 Subject: btrfs: tracepoints: show inode type in btrfs_sync_file_enter() event Print the type of the inode (directory, regular file, symlink, etc) to facilitate debugging. Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 0e96633b8b4b..726ca4ddb4d8 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -830,10 +830,11 @@ TRACE_EVENT(btrfs_sync_file_enter, TP_ARGS(file, datasync), TP_STRUCT__entry_btrfs( - __field( u64, ino ) - __field( u64, parent ) - __field( int, datasync ) - __field( u64, root_objectid ) + __field( u64, ino ) + __field( umode_t, mode ) + __field( u64, parent ) + __field( int, datasync ) + __field( u64, root_objectid ) ), TP_fast_assign( @@ -846,13 +847,13 @@ TRACE_EVENT(btrfs_sync_file_enter, __entry->parent = btrfs_ino(BTRFS_I(parent_inode)); __entry->datasync = datasync; __entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root); + __entry->mode = inode->i_mode; ), - TP_printk_btrfs("root=%llu(%s) ino=%llu parent=%llu datasync=%d", - show_root_type(__entry->root_objectid), - __entry->ino, - __entry->parent, - __entry->datasync) + TP_printk_btrfs("root=%llu(%s) ino=%llu type=%s parent=%llu datasync=%d", + show_root_type(__entry->root_objectid), __entry->ino, + show_inode_type(__entry->mode), __entry->parent, + __entry->datasync) ); TRACE_EVENT(btrfs_sync_file_exit, -- cgit v1.2.3 From 5b65452756717e89ad2a1e8690701dc3a71f3ec6 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Fri, 22 May 2026 11:02:44 +0200 Subject: btrfs: zoned: decode 'RECLAIM_ZONES' state in tracepoints Decode the 'RECLAIM_ZONES' state in tracepoints, as of now only the numerical state is shown in the tracepoints. Reviewed-by: Boris Burkov Reviewed-by: Naohiro Aota Signed-off-by: Johannes Thumshirn Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 726ca4ddb4d8..4c5c47c5edb7 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -114,6 +114,7 @@ struct btrfs_log_ctx; EM( ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE") \ EM( RUN_DELAYED_IPUTS, "RUN_DELAYED_IPUTS") \ EM( COMMIT_TRANS, "COMMIT_TRANS") \ + EM( RECLAIM_ZONES, "RECLAIM_ZONES") \ EMe(RESET_ZONES, "RESET_ZONES") #define TRANSACTION_STATES \ -- cgit v1.2.3