diff options
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r-- | fs/btrfs/file.c | 128 |
1 files changed, 21 insertions, 107 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 51e77d72068a..2be00e873e92 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -5,14 +5,11 @@ #include <linux/fs.h> #include <linux/pagemap.h> -#include <linux/highmem.h> #include <linux/time.h> #include <linux/init.h> #include <linux/string.h> #include <linux/backing-dev.h> -#include <linux/mpage.h> #include <linux/falloc.h> -#include <linux/swap.h> #include <linux/writeback.h> #include <linux/compat.h> #include <linux/slab.h> @@ -83,7 +80,7 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1, static int __btrfs_add_inode_defrag(struct btrfs_inode *inode, struct inode_defrag *defrag) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct inode_defrag *entry; struct rb_node **p; struct rb_node *parent = NULL; @@ -135,8 +132,8 @@ static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info) int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, struct btrfs_inode *inode) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); struct btrfs_root *root = inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; struct inode_defrag *defrag; u64 transid; int ret; @@ -185,7 +182,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode, struct inode_defrag *defrag) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; int ret; if (!__need_auto_defrag(fs_info)) @@ -833,8 +830,7 @@ next_slot: btrfs_file_extent_num_bytes(leaf, fi); } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { extent_end = key.offset + - btrfs_file_extent_inline_len(leaf, - path->slots[0], fi); + btrfs_file_extent_ram_bytes(leaf, fi); } else { /* can't happen */ BUG(); @@ -1133,7 +1129,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot, int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, u64 start, u64 end) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = inode->root; struct extent_buffer *leaf; struct btrfs_path *path; @@ -1470,7 +1466,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, u64 *lockstart, u64 *lockend, struct extent_state **cached_state) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 start_pos; u64 last_pos; int i; @@ -1526,7 +1522,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, size_t *write_bytes) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_root *root = inode->root; struct btrfs_ordered_extent *ordered; u64 lockstart, lockend; @@ -1569,10 +1565,11 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, return ret; } -static noinline ssize_t __btrfs_buffered_write(struct file *file, - struct iov_iter *i, - loff_t pos) +static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, + struct iov_iter *i) { + struct file *file = iocb->ki_filp; + loff_t pos = iocb->ki_pos; struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; @@ -1804,7 +1801,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - loff_t pos = iocb->ki_pos; + loff_t pos; ssize_t written; ssize_t written_buffered; loff_t endbyte; @@ -1815,8 +1812,8 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) if (written < 0 || !iov_iter_count(from)) return written; - pos += written; - written_buffered = __btrfs_buffered_write(file, from, pos); + pos = iocb->ki_pos; + written_buffered = btrfs_buffered_write(iocb, from); if (written_buffered < 0) { err = written_buffered; goto out; @@ -1953,7 +1950,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, if (iocb->ki_flags & IOCB_DIRECT) { num_written = __btrfs_direct_write(iocb, from); } else { - num_written = __btrfs_buffered_write(file, from, pos); + num_written = btrfs_buffered_write(iocb, from); if (num_written > 0) iocb->ki_pos = pos + num_written; if (clean_page) @@ -2042,7 +2039,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) struct btrfs_trans_handle *trans; struct btrfs_log_ctx ctx; int ret = 0, err; - bool full_sync = false; u64 len; /* @@ -2066,96 +2062,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) inode_lock(inode); atomic_inc(&root->log_batch); - full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, - &BTRFS_I(inode)->runtime_flags); + /* - * We might have have had more pages made dirty after calling - * start_ordered_ops and before acquiring the inode's i_mutex. + * We have to do this here to avoid the priority inversion of waiting on + * IO of a lower priority task while holding a transaciton open. */ - if (full_sync) { - /* - * For a full sync, we need to make sure any ordered operations - * start and finish before we start logging the inode, so that - * all extents are persisted and the respective file extent - * items are in the fs/subvol btree. - */ - ret = btrfs_wait_ordered_range(inode, start, len); - } else { - /* - * Start any new ordered operations before starting to log the - * inode. We will wait for them to finish in btrfs_sync_log(). - * - * Right before acquiring the inode's mutex, we might have new - * writes dirtying pages, which won't immediately start the - * respective ordered operations - that is done through the - * fill_delalloc callbacks invoked from the writepage and - * writepages address space operations. So make sure we start - * all ordered operations before starting to log our inode. Not - * doing this means that while logging the inode, writeback - * could start and invoke writepage/writepages, which would call - * the fill_delalloc callbacks (cow_file_range, - * submit_compressed_extents). These callbacks add first an - * extent map to the modified list of extents and then create - * the respective ordered operation, which means in - * tree-log.c:btrfs_log_inode() we might capture all existing - * ordered operations (with btrfs_get_logged_extents()) before - * the fill_delalloc callback adds its ordered operation, and by - * the time we visit the modified list of extent maps (with - * btrfs_log_changed_extents()), we see and process the extent - * map they created. We then use the extent map to construct a - * file extent item for logging without waiting for the - * respective ordered operation to finish - this file extent - * item points to a disk location that might not have yet been - * written to, containing random data - so after a crash a log - * replay will make our inode have file extent items that point - * to disk locations containing invalid data, as we returned - * success to userspace without waiting for the respective - * ordered operation to finish, because it wasn't captured by - * btrfs_get_logged_extents(). - */ - ret = start_ordered_ops(inode, start, end); - } + ret = btrfs_wait_ordered_range(inode, start, len); if (ret) { inode_unlock(inode); goto out; } atomic_inc(&root->log_batch); - /* - * If the last transaction that changed this file was before the current - * transaction and we have the full sync flag set in our inode, we can - * bail out now without any syncing. - * - * Note that we can't bail out if the full sync flag isn't set. This is - * because when the full sync flag is set we start all ordered extents - * and wait for them to fully complete - when they complete they update - * the inode's last_trans field through: - * - * btrfs_finish_ordered_io() -> - * btrfs_update_inode_fallback() -> - * btrfs_update_inode() -> - * btrfs_set_inode_last_trans() - * - * So we are sure that last_trans is up to date and can do this check to - * bail out safely. For the fast path, when the full sync flag is not - * set in our inode, we can not do it because we start only our ordered - * extents and don't wait for them to complete (that is when - * btrfs_finish_ordered_io runs), so here at this point their last_trans - * value might be less than or equals to fs_info->last_trans_committed, - * and setting a speculative last_trans for an inode when a buffered - * write is made (such as fs_info->generation + 1 for example) would not - * be reliable since after setting the value and before fsync is called - * any number of transactions can start and commit (transaction kthread - * commits the current transaction periodically), and a transaction - * commit does not start nor waits for ordered extents to complete. - */ smp_mb(); if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) || - (full_sync && BTRFS_I(inode)->last_trans <= - fs_info->last_trans_committed) || - (!btrfs_have_ordered_extents_in_range(inode, start, len) && - BTRFS_I(inode)->last_trans - <= fs_info->last_trans_committed)) { + BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed) { /* * We've had everything committed since the last time we were * modified so clear this flag in case it was set for whatever @@ -2239,13 +2160,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } } - if (!full_sync) { - ret = btrfs_wait_ordered_range(inode, start, len); - if (ret) { - btrfs_end_transaction(trans); - goto out; - } - } ret = btrfs_commit_transaction(trans); } else { ret = btrfs_end_transaction(trans); @@ -2310,7 +2224,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_path *path, u64 offset, u64 end) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = inode->root; struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; |