summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorKemeng Shi <shikemeng@huaweicloud.com>2024-04-23 06:46:39 +0300
committerAndrew Morton <akpm@linux-foundation.org>2024-05-06 03:53:51 +0300
commite32e27009fb0d42c9ce9e01c5d96769897d1fa70 (patch)
tree9966d1f9953e11ad9732ce4a676e9131b23a9ed0 /mm
parent4673ad3bdca2678a8970c0076aa07f5302d914fb (diff)
downloadlinux-e32e27009fb0d42c9ce9e01c5d96769897d1fa70.tar.xz
writeback: collect stats of all wb of bdi in bdi_debug_stats_show
Patch series "Improve visibility of writeback", v5. This series tries to improve visilibity of writeback. Patch 1 make /sys/kernel/debug/bdi/xxx/stats show writeback info of whole bdi instead of only writeback info in root cgroup. Patch 2 add a new debug file /sys/kernel/debug/bdi/xxx/wb_stats to show per wb writeback info. Patch 3 add wb_monitor.py to monitor basic writeback info of running system, more info could be added on demand. Patch 4 is a random cleanup. More details can be found in respective patches. Following domain hierarchy is tested: global domain (320G) / \ cgroup domain1(10G) cgroup domain2(10G) | | bdi wb1 wb2 /* all writeback info of bdi is successfully collected */ cat stats BdiWriteback: 4704 kB BdiReclaimable: 1294496 kB BdiDirtyThresh: 204208088 kB DirtyThresh: 195259944 kB BackgroundThresh: 32503588 kB BdiDirtied: 48519296 kB BdiWritten: 47225696 kB BdiWriteBandwidth: 1173892 kBps b_dirty: 1 b_io: 0 b_more_io: 1 b_dirty_time: 0 bdi_list: 1 state: 1 /* per wb writeback info of bdi is collected */ cat /sys/kernel/debug/bdi/252:16/wb_stats WbCgIno: 1 WbWriteback: 0 kB WbReclaimable: 0 kB WbDirtyThresh: 0 kB WbDirtied: 0 kB WbWritten: 0 kB WbWriteBandwidth: 102400 kBps b_dirty: 0 b_io: 0 b_more_io: 0 b_dirty_time: 0 state: 1 WbCgIno: 4208 WbWriteback: 59808 kB WbReclaimable: 676480 kB WbDirtyThresh: 6004624 kB WbDirtied: 23348192 kB WbWritten: 22614592 kB WbWriteBandwidth: 593204 kBps b_dirty: 1 b_io: 1 b_more_io: 0 b_dirty_time: 0 state: 7 WbCgIno: 4249 WbWriteback: 144256 kB WbReclaimable: 432096 kB WbDirtyThresh: 6004344 kB WbDirtied: 25727744 kB WbWritten: 25154752 kB WbWriteBandwidth: 577904 kBps b_dirty: 0 b_io: 1 b_more_io: 0 b_dirty_time: 0 state: 7 The wb_monitor.py script output is as following: ./wb_monitor.py 252:16 -c writeback reclaimable dirtied written avg_bw 252:16_1 0 0 0 0 102400 252:16_4284 672 820064 9230368 8410304 685612 252:16_4325 896 819840 10491264 9671648 652348 252:16 1568 1639904 19721632 18081952 1440360 writeback reclaimable dirtied written avg_bw 252:16_1 0 0 0 0 102400 252:16_4284 672 820064 9230368 8410304 685612 252:16_4325 896 819840 10491264 9671648 652348 252:16 1568 1639904 19721632 18081952 1440360 ... This patch (of 5): /sys/kernel/debug/bdi/xxx/stats is supposed to show writeback information of whole bdi, but only writeback information of bdi in root cgroup is collected. So writeback information in non-root cgroup are missing now. To be more specific, considering following case: /* create writeback cgroup */ cd /sys/fs/cgroup echo "+memory +io" > cgroup.subtree_control mkdir group1 cd group1 echo $$ > cgroup.procs /* do writeback in cgroup */ fio -name test -filename=/dev/vdb ... /* get writeback info of bdi */ cat /sys/kernel/debug/bdi/xxx/stats The cat result unexpectedly implies that there is no writeback on target bdi. Fix this by collecting stats of all wb in bdi instead of only wb in root cgroup. Following domain hierarchy is tested: global domain (320G) / \ cgroup domain1(10G) cgroup domain2(10G) | | bdi wb1 wb2 /* all writeback info of bdi is successfully collected */ cat stats BdiWriteback: 2912 kB BdiReclaimable: 1598464 kB BdiDirtyThresh: 167479028 kB DirtyThresh: 195038532 kB BackgroundThresh: 32466728 kB BdiDirtied: 19141696 kB BdiWritten: 17543456 kB BdiWriteBandwidth: 1136172 kBps b_dirty: 2 b_io: 0 b_more_io: 1 b_dirty_time: 0 bdi_list: 1 state: 1 Link: https://lkml.kernel.org/r/20240423034643.141219-1-shikemeng@huaweicloud.com Link: https://lkml.kernel.org/r/20240423034643.141219-2-shikemeng@huaweicloud.com Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com> Acked-by: Tejun Heo <tj@kernel.org> Cc: Brian Foster <bfoster@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: David Sterba <dsterba@suse.com> Cc: Jan Kara <jack@suse.cz> Cc: Mateusz Guzik <mjguzik@gmail.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: SeongJae Park <sj@kernel.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c96
1 files changed, 73 insertions, 23 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 5fa3666356f9..089146feb830 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -39,6 +39,19 @@ struct workqueue_struct *bdi_wq;
#include <linux/debugfs.h>
#include <linux/seq_file.h>
+struct wb_stats {
+ unsigned long nr_dirty;
+ unsigned long nr_io;
+ unsigned long nr_more_io;
+ unsigned long nr_dirty_time;
+ unsigned long nr_writeback;
+ unsigned long nr_reclaimable;
+ unsigned long nr_dirtied;
+ unsigned long nr_written;
+ unsigned long dirty_thresh;
+ unsigned long wb_thresh;
+};
+
static struct dentry *bdi_debug_root;
static void bdi_debug_init(void)
@@ -46,31 +59,68 @@ static void bdi_debug_init(void)
bdi_debug_root = debugfs_create_dir("bdi", NULL);
}
-static int bdi_debug_stats_show(struct seq_file *m, void *v)
+static void collect_wb_stats(struct wb_stats *stats,
+ struct bdi_writeback *wb)
{
- struct backing_dev_info *bdi = m->private;
- struct bdi_writeback *wb = &bdi->wb;
- unsigned long background_thresh;
- unsigned long dirty_thresh;
- unsigned long wb_thresh;
- unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
struct inode *inode;
- nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
spin_lock(&wb->list_lock);
list_for_each_entry(inode, &wb->b_dirty, i_io_list)
- nr_dirty++;
+ stats->nr_dirty++;
list_for_each_entry(inode, &wb->b_io, i_io_list)
- nr_io++;
+ stats->nr_io++;
list_for_each_entry(inode, &wb->b_more_io, i_io_list)
- nr_more_io++;
+ stats->nr_more_io++;
list_for_each_entry(inode, &wb->b_dirty_time, i_io_list)
if (inode->i_state & I_DIRTY_TIME)
- nr_dirty_time++;
+ stats->nr_dirty_time++;
spin_unlock(&wb->list_lock);
+ stats->nr_writeback += wb_stat(wb, WB_WRITEBACK);
+ stats->nr_reclaimable += wb_stat(wb, WB_RECLAIMABLE);
+ stats->nr_dirtied += wb_stat(wb, WB_DIRTIED);
+ stats->nr_written += wb_stat(wb, WB_WRITTEN);
+ stats->wb_thresh += wb_calc_thresh(wb, stats->dirty_thresh);
+}
+
+#ifdef CONFIG_CGROUP_WRITEBACK
+static void bdi_collect_stats(struct backing_dev_info *bdi,
+ struct wb_stats *stats)
+{
+ struct bdi_writeback *wb;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) {
+ if (!wb_tryget(wb))
+ continue;
+
+ collect_wb_stats(stats, wb);
+ wb_put(wb);
+ }
+ rcu_read_unlock();
+}
+#else
+static void bdi_collect_stats(struct backing_dev_info *bdi,
+ struct wb_stats *stats)
+{
+ collect_wb_stats(stats, &bdi->wb);
+}
+#endif
+
+static int bdi_debug_stats_show(struct seq_file *m, void *v)
+{
+ struct backing_dev_info *bdi = m->private;
+ unsigned long background_thresh;
+ unsigned long dirty_thresh;
+ struct wb_stats stats;
+ unsigned long tot_bw;
+
global_dirty_limits(&background_thresh, &dirty_thresh);
- wb_thresh = wb_calc_thresh(wb, dirty_thresh);
+
+ memset(&stats, 0, sizeof(stats));
+ stats.dirty_thresh = dirty_thresh;
+ bdi_collect_stats(bdi, &stats);
+ tot_bw = atomic_long_read(&bdi->tot_write_bandwidth);
seq_printf(m,
"BdiWriteback: %10lu kB\n"
@@ -87,18 +137,18 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
"b_dirty_time: %10lu\n"
"bdi_list: %10u\n"
"state: %10lx\n",
- (unsigned long) K(wb_stat(wb, WB_WRITEBACK)),
- (unsigned long) K(wb_stat(wb, WB_RECLAIMABLE)),
- K(wb_thresh),
+ K(stats.nr_writeback),
+ K(stats.nr_reclaimable),
+ K(stats.wb_thresh),
K(dirty_thresh),
K(background_thresh),
- (unsigned long) K(wb_stat(wb, WB_DIRTIED)),
- (unsigned long) K(wb_stat(wb, WB_WRITTEN)),
- (unsigned long) K(wb->write_bandwidth),
- nr_dirty,
- nr_io,
- nr_more_io,
- nr_dirty_time,
+ K(stats.nr_dirtied),
+ K(stats.nr_written),
+ K(tot_bw),
+ stats.nr_dirty,
+ stats.nr_io,
+ stats.nr_more_io,
+ stats.nr_dirty_time,
!list_empty(&bdi->bdi_list), bdi->wb.state);
return 0;