summaryrefslogtreecommitdiff
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-03 05:59:25 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-03 05:59:25 +0300
commitf3cdc8ae116e27d84e1f33c7a2995960cebb73ac (patch)
treedb3dbbbbf82b76590f601b5caee5de3bef151c4b /fs/btrfs/file.c
parent8eeae5bae1239c030ba0b34cac97ebd5e7ec1886 (diff)
parent2166e5edce9ac1edf3b113d6091ef72fcac2d6c4 (diff)
downloadlinux-f3cdc8ae116e27d84e1f33c7a2995960cebb73ac.tar.xz
Merge tag 'for-5.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "Highlights: - speedup dead root detection during orphan cleanup, eg. when there are many deleted subvolumes waiting to be cleaned, the trees are now looked up in radix tree instead of a O(N^2) search - snapshot creation with inherited qgroup will mark the qgroup inconsistent, requires a rescan - send will emit file capabilities after chown, this produces a stream that does not need postprocessing to set the capabilities again - direct io ported to iomap infrastructure, cleaned up and simplified code, notably removing last use of struct buffer_head in btrfs code Core changes: - factor out backreference iteration, to be used by ordinary backreferences and relocation code - improved global block reserve utilization * better logic to serialize requests * increased maximum available for unlink * improved handling on large pages (64K) - direct io cleanups and fixes * simplify layering, where cloned bios were unnecessarily created for some cases * error handling fixes (submit, endio) * remove repair worker thread, used to avoid deadlocks during repair - refactored block group reading code, preparatory work for new type of block group storage that should improve mount time on large filesystems Cleanups: - cleaned up (and slightly sped up) set/get helpers for metadata data structure members - root bit REF_COWS got renamed to SHAREABLE to reflect the that the blocks of the tree get shared either among subvolumes or with the relocation trees Fixes: - when subvolume deletion fails due to ENOSPC, the filesystem is not turned read-only - device scan deals with devices from other filesystems that changed ownership due to overwrite (mkfs) - fix a race between scrub and block group removal/allocation - fix long standing bug of a runaway balance operation, printing the same line to the syslog, caused by a stale status bit on a reloc tree that prevented progress - fix corrupt log due to concurrent fsync of inodes with shared extents - fix space underflow for NODATACOW and buffered writes when it for some reason needs to fallback to COW mode" * tag 'for-5.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (133 commits) btrfs: fix space_info bytes_may_use underflow during space cache writeout btrfs: fix space_info bytes_may_use underflow after nocow buffered write btrfs: fix wrong file range cleanup after an error filling dealloc range btrfs: remove redundant local variable in read_block_for_search btrfs: open code key_search btrfs: split btrfs_direct_IO to read and write part btrfs: remove BTRFS_INODE_READDIO_NEED_LOCK fs: remove dio_end_io() btrfs: switch to iomap_dio_rw() for dio iomap: remove lockdep_assert_held() iomap: add a filesystem hook for direct I/O bio submission fs: export generic_file_buffered_read() btrfs: turn space cache writeout failure messages into debug messages btrfs: include error on messages about failure to write space/inode caches btrfs: remove useless 'fail_unlock' label from btrfs_csum_file_blocks() btrfs: do not ignore error from btrfs_next_leaf() when inserting checksums btrfs: make checksum item extension more efficient btrfs: fix corrupt log due to concurrent fsync of inodes with shared extents btrfs: unexport btrfs_compress_set_level() btrfs: simplify iget helpers ...
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c111
1 files changed, 94 insertions, 17 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 719e68ab552c..fde125616687 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -275,26 +275,18 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
{
struct btrfs_root *inode_root;
struct inode *inode;
- struct btrfs_key key;
struct btrfs_ioctl_defrag_range_args range;
int num_defrag;
int ret;
/* get the inode */
- key.objectid = defrag->root;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
-
- inode_root = btrfs_get_fs_root(fs_info, &key, true);
+ inode_root = btrfs_get_fs_root(fs_info, defrag->root, true);
if (IS_ERR(inode_root)) {
ret = PTR_ERR(inode_root);
goto cleanup;
}
- key.objectid = defrag->ino;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &key, inode_root);
+ inode = btrfs_iget(fs_info->sb, defrag->ino, inode_root);
btrfs_put_root(inode_root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
@@ -775,7 +767,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
modify_tree = 0;
- update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
+ update_refs = (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
root == fs_info->tree_root);
while (1) {
recow = 0;
@@ -1817,21 +1809,61 @@ again:
return num_written ? num_written : ret;
}
-static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
+static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
+ const struct iov_iter *iter, loff_t offset)
+{
+ const unsigned int blocksize_mask = fs_info->sectorsize - 1;
+
+ if (offset & blocksize_mask)
+ return -EINVAL;
+
+ if (iov_iter_alignment(iter) & blocksize_mask)
+ return -EINVAL;
+
+ return 0;
+}
+
+static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- loff_t pos;
- ssize_t written;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ loff_t pos = iocb->ki_pos;
+ ssize_t written = 0;
ssize_t written_buffered;
loff_t endbyte;
int err;
+ size_t count = 0;
+ bool relock = false;
- written = generic_file_direct_write(iocb, from);
+ if (check_direct_IO(fs_info, from, pos))
+ goto buffered;
+
+ count = iov_iter_count(from);
+ /*
+ * If the write DIO is beyond the EOF, we need update the isize, but it
+ * is protected by i_mutex. So we can not unlock the i_mutex at this
+ * case.
+ */
+ if (pos + count <= inode->i_size) {
+ inode_unlock(inode);
+ relock = true;
+ } else if (iocb->ki_flags & IOCB_NOWAIT) {
+ return -EAGAIN;
+ }
+
+ down_read(&BTRFS_I(inode)->dio_sem);
+ written = iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dops,
+ is_sync_kiocb(iocb));
+ up_read(&BTRFS_I(inode)->dio_sem);
+
+ if (relock)
+ inode_lock(inode);
if (written < 0 || !iov_iter_count(from))
return written;
+buffered:
pos = iocb->ki_pos;
written_buffered = btrfs_buffered_write(iocb, from);
if (written_buffered < 0) {
@@ -1970,7 +2002,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
atomic_inc(&BTRFS_I(inode)->sync_writers);
if (iocb->ki_flags & IOCB_DIRECT) {
- num_written = __btrfs_direct_write(iocb, from);
+ num_written = btrfs_direct_write(iocb, from);
} else {
num_written = btrfs_buffered_write(iocb, from);
if (num_written > 0)
@@ -3484,9 +3516,54 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
return generic_file_open(inode, filp);
}
+static int check_direct_read(struct btrfs_fs_info *fs_info,
+ const struct iov_iter *iter, loff_t offset)
+{
+ int ret;
+ int i, seg;
+
+ ret = check_direct_IO(fs_info, iter, offset);
+ if (ret < 0)
+ return ret;
+
+ for (seg = 0; seg < iter->nr_segs; seg++)
+ for (i = seg + 1; i < iter->nr_segs; i++)
+ if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
+ return -EINVAL;
+ return 0;
+}
+
+static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ ssize_t ret;
+
+ if (check_direct_read(btrfs_sb(inode->i_sb), to, iocb->ki_pos))
+ return 0;
+
+ inode_lock_shared(inode);
+ ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dops,
+ is_sync_kiocb(iocb));
+ inode_unlock_shared(inode);
+ return ret;
+}
+
+static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ ssize_t ret = 0;
+
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ ret = btrfs_direct_read(iocb, to);
+ if (ret < 0)
+ return ret;
+ }
+
+ return generic_file_buffered_read(iocb, to, ret);
+}
+
const struct file_operations btrfs_file_operations = {
.llseek = btrfs_file_llseek,
- .read_iter = generic_file_read_iter,
+ .read_iter = btrfs_file_read_iter,
.splice_read = generic_file_splice_read,
.write_iter = btrfs_file_write_iter,
.mmap = btrfs_file_mmap,