From d4135134ab8feb994369d44884733e8031b0f800 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 23 Mar 2022 16:19:30 +0000 Subject: btrfs: avoid blocking on space revervation when doing nowait dio writes When doing a NOWAIT direct IO write, if we can NOCOW then it means we can proceed with the non-blocking, NOWAIT path. However reserving the metadata space and qgroup meta space can often result in blocking - flushing delalloc, wait for ordered extents to complete, trigger transaction commits, etc, going against the semantics of a NOWAIT write. So make the NOWAIT write path to try to reserve all the metadata it needs without resulting in a blocking behaviour - if we get -ENOSPC or -EDQUOT then return -EAGAIN to make the caller fallback to a blocking direct IO write. This is part of a patchset comprised of the following patches: btrfs: avoid blocking on page locks with nowait dio on compressed range btrfs: avoid blocking nowait dio when locking file range btrfs: avoid double nocow check when doing nowait dio writes btrfs: stop allocating a path when checking if cross reference exists btrfs: free path at can_nocow_extent() before checking for checksum items btrfs: release path earlier at can_nocow_extent() btrfs: avoid blocking when allocating context for nowait dio read/write btrfs: avoid blocking on space revervation when doing nowait dio writes The following test was run before and after applying this patchset: $ cat io-uring-nodatacow-test.sh #!/bin/bash DEV=/dev/sdc MNT=/mnt/sdc MOUNT_OPTIONS="-o ssd -o nodatacow" MKFS_OPTIONS="-R free-space-tree -O no-holes" NUM_JOBS=4 FILE_SIZE=8G RUN_TIME=300 cat < /tmp/fio-job.ini [io_uring_rw] rw=randrw fsync=0 fallocate=posix group_reporting=1 direct=1 ioengine=io_uring iodepth=64 bssplit=4k/20:8k/20:16k/20:32k/10:64k/10:128k/5:256k/5:512k/5:1m/5 filesize=$FILE_SIZE runtime=$RUN_TIME time_based filename=foobar directory=$MNT numjobs=$NUM_JOBS thread EOF echo performance | \ tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor umount $MNT &> /dev/null mkfs.btrfs -f $MKFS_OPTIONS $DEV &> /dev/null mount $MOUNT_OPTIONS $DEV $MNT fio /tmp/fio-job.ini umount $MNT The test was run a 12 cores box with 64G of ram, using a non-debug kernel config (Debian's default config) and a spinning disk. Result before the patchset: READ: bw=407MiB/s (427MB/s), 407MiB/s-407MiB/s (427MB/s-427MB/s), io=119GiB (128GB), run=300175-300175msec WRITE: bw=407MiB/s (427MB/s), 407MiB/s-407MiB/s (427MB/s-427MB/s), io=119GiB (128GB), run=300175-300175msec Result after the patchset: READ: bw=436MiB/s (457MB/s), 436MiB/s-436MiB/s (457MB/s-457MB/s), io=128GiB (137GB), run=300044-300044msec WRITE: bw=435MiB/s (456MB/s), 435MiB/s-435MiB/s (456MB/s-456MB/s), io=128GiB (137GB), run=300044-300044msec That's about +7.2% throughput for reads and +6.9% for writes. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4254c3c7b9f7..b3f2010d9df5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4705,7 +4705,7 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len, goto out; } } - ret = btrfs_delalloc_reserve_metadata(inode, blocksize, blocksize); + ret = btrfs_delalloc_reserve_metadata(inode, blocksize, blocksize, false); if (ret < 0) { if (!only_release_metadata) btrfs_free_reserved_data_space(inode, data_reserved, @@ -7415,6 +7415,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, u64 start, u64 len, unsigned int iomap_flags) { + const bool nowait = (iomap_flags & IOMAP_NOWAIT); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct extent_map *em = *map; int type; @@ -7454,12 +7455,15 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, struct extent_map *em2; /* We can NOCOW, so only need to reserve metadata space. */ - ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len, len); + ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len, len, + nowait); if (ret < 0) { /* Our caller expects us to free the input extent map. */ free_extent_map(em); *map = NULL; btrfs_dec_nocow_writers(fs_info, block_start); + if (nowait && (ret == -ENOSPC || ret == -EDQUOT)) + ret = -EAGAIN; goto out; } space_reserved = true; @@ -7483,7 +7487,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, free_extent_map(em); *map = NULL; - if (iomap_flags & IOMAP_NOWAIT) + if (nowait) return -EAGAIN; /* We have to COW, so need to reserve metadata and data space. */ @@ -10801,7 +10805,8 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, ret = btrfs_qgroup_reserve_data(inode, &data_reserved, start, num_bytes); if (ret) goto out_free_data_space; - ret = btrfs_delalloc_reserve_metadata(inode, num_bytes, disk_num_bytes); + ret = btrfs_delalloc_reserve_metadata(inode, num_bytes, disk_num_bytes, + false); if (ret) goto out_qgroup_free_data; -- cgit v1.2.3