summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2025-09-15 15:50:03 +0300
committerChristian Brauner <brauner@kernel.org>2025-09-19 14:11:06 +0300
commit9426414f0d42f824892ecd4dccfebf8987084a41 (patch)
tree8ef299818292d567a88da80cf69905a7956a9712 /include
parent8f5ae30d69d7543eee0d70083daf4de8fe15d585 (diff)
parent0cee64c547e3c9cda646af3e075a64f445ee8148 (diff)
downloadlinux-9426414f0d42f824892ecd4dccfebf8987084a41.tar.xz
Merge patch series "writeback: Avoid lockups when switching inodes"
Jan Kara <jack@suse.cz> says: This patch series addresses lockups reported by users when systemd unit reading lots of files from a filesystem mounted with lazytime mount option exits. See patch 3 for more details about the reproducer. There are two main issues why switching many inodes between wbs: 1) Multiple workers will be spawned to do the switching but they all contend on the same wb->list_lock making all the parallelism pointless and just wasting time. 2) Sorting of wb->b_dirty list by dirtied_time_when is inherently slow. Patches 1-3 address these problems, patch 4 adds a tracepoint for better observability of inode writeback switching. * patches from https://lore.kernel.org/20250912103522.2935-1-jack@suse.cz: writeback: Add tracepoint to track pending inode switches writeback: Avoid excessively long inode switching times writeback: Avoid softlockup when switching many inodes writeback: Avoid contention on wb->list_lock when switching inodes Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/backing-dev-defs.h4
-rw-r--r--include/linux/writeback.h2
-rw-r--r--include/trace/events/writeback.h29
3 files changed, 35 insertions, 0 deletions
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 2ad261082bba..c5c9d89c73ed 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -152,6 +152,10 @@ struct bdi_writeback {
struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */
struct list_head b_attached; /* attached inodes, protected by list_lock */
struct list_head offline_node; /* anchored at offline_cgwbs */
+ struct work_struct switch_work; /* work used to perform inode switching
+ * to this wb */
+ struct llist_head switch_wbs_ctxs; /* queued contexts for
+ * writeback switching */
union {
struct work_struct release_work;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a2848d731a46..15a4bc4ab819 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -265,6 +265,8 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
}
+void inode_switch_wbs_work_fn(struct work_struct *work);
+
#else /* CONFIG_CGROUP_WRITEBACK */
static inline void inode_attach_wb(struct inode *inode, struct folio *folio)
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 1e23919c0da9..c08aff044e80 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -213,6 +213,35 @@ TRACE_EVENT(inode_foreign_history,
)
);
+TRACE_EVENT(inode_switch_wbs_queue,
+
+ TP_PROTO(struct bdi_writeback *old_wb, struct bdi_writeback *new_wb,
+ unsigned int count),
+
+ TP_ARGS(old_wb, new_wb, count),
+
+ TP_STRUCT__entry(
+ __array(char, name, 32)
+ __field(ino_t, old_cgroup_ino)
+ __field(ino_t, new_cgroup_ino)
+ __field(unsigned int, count)
+ ),
+
+ TP_fast_assign(
+ strscpy_pad(__entry->name, bdi_dev_name(old_wb->bdi), 32);
+ __entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb);
+ __entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb);
+ __entry->count = count;
+ ),
+
+ TP_printk("bdi %s: old_cgroup_ino=%lu new_cgroup_ino=%lu count=%u",
+ __entry->name,
+ (unsigned long)__entry->old_cgroup_ino,
+ (unsigned long)__entry->new_cgroup_ino,
+ __entry->count
+ )
+);
+
TRACE_EVENT(inode_switch_wbs,
TP_PROTO(struct inode *inode, struct bdi_writeback *old_wb,