diff options
| author | Christian Brauner <brauner@kernel.org> | 2025-09-15 15:50:03 +0300 |
|---|---|---|
| committer | Christian Brauner <brauner@kernel.org> | 2025-09-19 14:11:06 +0300 |
| commit | 9426414f0d42f824892ecd4dccfebf8987084a41 (patch) | |
| tree | 8ef299818292d567a88da80cf69905a7956a9712 /include | |
| parent | 8f5ae30d69d7543eee0d70083daf4de8fe15d585 (diff) | |
| parent | 0cee64c547e3c9cda646af3e075a64f445ee8148 (diff) | |
| download | linux-9426414f0d42f824892ecd4dccfebf8987084a41.tar.xz | |
Merge patch series "writeback: Avoid lockups when switching inodes"
Jan Kara <jack@suse.cz> says:
This patch series addresses lockups reported by users when systemd unit reading
lots of files from a filesystem mounted with lazytime mount option exits. See
patch 3 for more details about the reproducer.
There are two main issues why switching many inodes between wbs:
1) Multiple workers will be spawned to do the switching but they all contend
on the same wb->list_lock making all the parallelism pointless and just
wasting time.
2) Sorting of wb->b_dirty list by dirtied_time_when is inherently slow.
Patches 1-3 address these problems, patch 4 adds a tracepoint for better
observability of inode writeback switching.
* patches from https://lore.kernel.org/20250912103522.2935-1-jack@suse.cz:
writeback: Add tracepoint to track pending inode switches
writeback: Avoid excessively long inode switching times
writeback: Avoid softlockup when switching many inodes
writeback: Avoid contention on wb->list_lock when switching inodes
Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/backing-dev-defs.h | 4 | ||||
| -rw-r--r-- | include/linux/writeback.h | 2 | ||||
| -rw-r--r-- | include/trace/events/writeback.h | 29 |
3 files changed, 35 insertions, 0 deletions
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 2ad261082bba..c5c9d89c73ed 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -152,6 +152,10 @@ struct bdi_writeback { struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */ struct list_head b_attached; /* attached inodes, protected by list_lock */ struct list_head offline_node; /* anchored at offline_cgwbs */ + struct work_struct switch_work; /* work used to perform inode switching + * to this wb */ + struct llist_head switch_wbs_ctxs; /* queued contexts for + * writeback switching */ union { struct work_struct release_work; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a2848d731a46..15a4bc4ab819 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -265,6 +265,8 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); } +void inode_switch_wbs_work_fn(struct work_struct *work); + #else /* CONFIG_CGROUP_WRITEBACK */ static inline void inode_attach_wb(struct inode *inode, struct folio *folio) diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 1e23919c0da9..c08aff044e80 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -213,6 +213,35 @@ TRACE_EVENT(inode_foreign_history, ) ); +TRACE_EVENT(inode_switch_wbs_queue, + + TP_PROTO(struct bdi_writeback *old_wb, struct bdi_writeback *new_wb, + unsigned int count), + + TP_ARGS(old_wb, new_wb, count), + + TP_STRUCT__entry( + __array(char, name, 32) + __field(ino_t, old_cgroup_ino) + __field(ino_t, new_cgroup_ino) + __field(unsigned int, count) + ), + + TP_fast_assign( + strscpy_pad(__entry->name, bdi_dev_name(old_wb->bdi), 32); + __entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb); + __entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb); + __entry->count = count; + ), + + TP_printk("bdi %s: old_cgroup_ino=%lu new_cgroup_ino=%lu count=%u", + __entry->name, + (unsigned long)__entry->old_cgroup_ino, + (unsigned long)__entry->new_cgroup_ino, + __entry->count + ) +); + TRACE_EVENT(inode_switch_wbs, TP_PROTO(struct inode *inode, struct bdi_writeback *old_wb, |
