diff options
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r-- | fs/btrfs/file.c | 184 |
1 files changed, 125 insertions, 59 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2c14312b05e8..bb824c7cb7c7 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -500,18 +500,18 @@ next: * this also makes the decision about creating an inline extent vs * doing real data extents, marking pages dirty and delalloc as required. */ -int btrfs_dirty_pages(struct inode *inode, struct page **pages, +int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages, size_t num_pages, loff_t pos, size_t write_bytes, struct extent_state **cached) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; int err = 0; int i; u64 num_bytes; u64 start_pos; u64 end_of_last_block; u64 end_pos = pos + write_bytes; - loff_t isize = i_size_read(inode); + loff_t isize = i_size_read(&inode->vfs_inode); unsigned int extra_bits = 0; start_pos = pos & ~((u64) fs_info->sectorsize - 1); @@ -524,13 +524,13 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, * The pages may have already been dirty, clear out old accounting so * we can set things up properly */ - clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, end_of_last_block, + clear_extent_bit(&inode->io_tree, start_pos, end_of_last_block, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, cached); - if (!btrfs_is_free_space_inode(BTRFS_I(inode))) { + if (!btrfs_is_free_space_inode(inode)) { if (start_pos >= isize && - !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) { + !(inode->flags & BTRFS_INODE_PREALLOC)) { /* * There can't be any extents following eof in this case * so just set the delalloc new bit for the range @@ -538,8 +538,7 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, */ extra_bits |= EXTENT_DELALLOC_NEW; } else { - err = btrfs_find_new_delalloc_bytes(BTRFS_I(inode), - start_pos, + err = btrfs_find_new_delalloc_bytes(inode, start_pos, num_bytes, cached); if (err) return err; @@ -564,7 +563,7 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, * at this time. */ if (end_pos > isize) - i_size_write(inode, end_pos); + i_size_write(&inode->vfs_inode, end_pos); return 0; } @@ -731,7 +730,7 @@ next: * is deleted from the tree. */ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, + struct btrfs_root *root, struct btrfs_inode *inode, struct btrfs_path *path, u64 start, u64 end, u64 *drop_end, int drop_cache, int replace_extent, @@ -744,7 +743,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_ref ref = { 0 }; struct btrfs_key key; struct btrfs_key new_key; - u64 ino = btrfs_ino(BTRFS_I(inode)); + struct inode *vfs_inode = &inode->vfs_inode; + u64 ino = btrfs_ino(inode); u64 search_start = start; u64 disk_bytenr = 0; u64 num_bytes = 0; @@ -762,9 +762,9 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, int leafs_visited = 0; if (drop_cache) - btrfs_drop_extent_cache(BTRFS_I(inode), start, end - 1, 0); + btrfs_drop_extent_cache(inode, start, end - 1, 0); - if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) + if (start >= inode->disk_i_size && !replace_extent) modify_tree = 0; update_refs = (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) || @@ -935,7 +935,7 @@ next_slot: extent_end - end); btrfs_mark_buffer_dirty(leaf); if (update_refs && disk_bytenr > 0) - inode_sub_bytes(inode, end - key.offset); + inode_sub_bytes(vfs_inode, end - key.offset); break; } @@ -955,7 +955,7 @@ next_slot: start - key.offset); btrfs_mark_buffer_dirty(leaf); if (update_refs && disk_bytenr > 0) - inode_sub_bytes(inode, extent_end - start); + inode_sub_bytes(vfs_inode, extent_end - start); if (end == extent_end) break; @@ -979,7 +979,7 @@ delete_extent_item: if (update_refs && extent_type == BTRFS_FILE_EXTENT_INLINE) { - inode_sub_bytes(inode, + inode_sub_bytes(vfs_inode, extent_end - key.offset); extent_end = ALIGN(extent_end, fs_info->sectorsize); @@ -993,7 +993,7 @@ delete_extent_item: key.offset - extent_offset); ret = btrfs_free_extent(trans, &ref); BUG_ON(ret); /* -ENOMEM */ - inode_sub_bytes(inode, + inode_sub_bytes(vfs_inode, extent_end - key.offset); } @@ -1082,8 +1082,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL, - drop_cache, 0, 0, NULL); + ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path, start, + end, NULL, drop_cache, 0, 0, NULL); btrfs_free_path(path); return ret; } @@ -1532,8 +1532,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, return ret; } -static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, - size_t *write_bytes) +static int check_can_nocow(struct btrfs_inode *inode, loff_t pos, + size_t *write_bytes, bool nowait) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_root *root = inode->root; @@ -1541,32 +1541,87 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, u64 num_bytes; int ret; - if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) + if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC))) + return 0; + + if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock)) return -EAGAIN; lockstart = round_down(pos, fs_info->sectorsize); lockend = round_up(pos + *write_bytes, fs_info->sectorsize) - 1; + num_bytes = lockend - lockstart + 1; - btrfs_lock_and_flush_ordered_range(inode, lockstart, - lockend, NULL); + if (nowait) { + struct btrfs_ordered_extent *ordered; + + if (!try_lock_extent(&inode->io_tree, lockstart, lockend)) + return -EAGAIN; + + ordered = btrfs_lookup_ordered_range(inode, lockstart, + num_bytes); + if (ordered) { + btrfs_put_ordered_extent(ordered); + ret = -EAGAIN; + goto out_unlock; + } + } else { + btrfs_lock_and_flush_ordered_range(inode, lockstart, + lockend, NULL); + } - num_bytes = lockend - lockstart + 1; ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes, NULL, NULL, NULL); if (ret <= 0) { ret = 0; - btrfs_drew_write_unlock(&root->snapshot_lock); + if (!nowait) + btrfs_drew_write_unlock(&root->snapshot_lock); } else { *write_bytes = min_t(size_t, *write_bytes , num_bytes - pos + lockstart); } - +out_unlock: unlock_extent(&inode->io_tree, lockstart, lockend); return ret; } +static int check_nocow_nolock(struct btrfs_inode *inode, loff_t pos, + size_t *write_bytes) +{ + return check_can_nocow(inode, pos, write_bytes, true); +} + +/* + * Check if we can do nocow write into the range [@pos, @pos + @write_bytes) + * + * @pos: File offset + * @write_bytes: The length to write, will be updated to the nocow writeable + * range + * + * This function will flush ordered extents in the range to ensure proper + * nocow checks. + * + * Return: + * >0 and update @write_bytes if we can do nocow write + * 0 if we can't do nocow write + * -EAGAIN if we can't get the needed lock or there are ordered extents + * for * (nowait == true) case + * <0 if other error happened + * + * NOTE: Callers need to release the lock by btrfs_check_nocow_unlock(). + */ +int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, + size_t *write_bytes) +{ + return check_can_nocow(inode, pos, write_bytes, false); +} + +void btrfs_check_nocow_unlock(struct btrfs_inode *inode) +{ + btrfs_drew_write_unlock(&inode->root->snapshot_lock); +} + static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i) { @@ -1574,7 +1629,6 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, loff_t pos = iocb->ki_pos; struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_root *root = BTRFS_I(inode)->root; struct page **pages = NULL; struct extent_changeset *data_reserved = NULL; u64 release_bytes = 0; @@ -1627,13 +1681,12 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, fs_info->sectorsize); extent_changeset_release(data_reserved); - ret = btrfs_check_data_free_space(inode, &data_reserved, pos, + ret = btrfs_check_data_free_space(BTRFS_I(inode), + &data_reserved, pos, write_bytes); if (ret < 0) { - if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | - BTRFS_INODE_PREALLOC)) && - check_can_nocow(BTRFS_I(inode), pos, - &write_bytes) > 0) { + if (btrfs_check_nocow_lock(BTRFS_I(inode), pos, + &write_bytes) > 0) { /* * For nodata cow case, no need to reserve * data space. @@ -1658,11 +1711,11 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, reserve_bytes); if (ret) { if (!only_release_metadata) - btrfs_free_reserved_data_space(inode, + btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved, pos, write_bytes); else - btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_check_nocow_unlock(BTRFS_I(inode)); break; } @@ -1732,7 +1785,7 @@ again: __pos = round_down(pos, fs_info->sectorsize) + (dirty_pages << PAGE_SHIFT); - btrfs_delalloc_release_space(inode, + btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, __pos, release_bytes, true); } @@ -1742,8 +1795,9 @@ again: fs_info->sectorsize); if (copied > 0) - ret = btrfs_dirty_pages(inode, pages, dirty_pages, - pos, copied, &cached_state); + ret = btrfs_dirty_pages(BTRFS_I(inode), pages, + dirty_pages, pos, copied, + &cached_state); /* * If we have not locked the extent range, because the range's @@ -1766,7 +1820,7 @@ again: release_bytes = 0; if (only_release_metadata) - btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_check_nocow_unlock(BTRFS_I(inode)); if (only_release_metadata && copied > 0) { lockstart = round_down(pos, @@ -1784,8 +1838,6 @@ again: cond_resched(); balance_dirty_pages_ratelimited(inode->i_mapping); - if (dirty_pages < (fs_info->nodesize >> PAGE_SHIFT) + 1) - btrfs_btree_balance_dirty(fs_info); pos += copied; num_written += copied; @@ -1795,11 +1847,12 @@ again: if (release_bytes) { if (only_release_metadata) { - btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_check_nocow_unlock(BTRFS_I(inode)); btrfs_delalloc_release_metadata(BTRFS_I(inode), release_bytes, true); } else { - btrfs_delalloc_release_space(inode, data_reserved, + btrfs_delalloc_release_space(BTRFS_I(inode), + data_reserved, round_down(pos, fs_info->sectorsize), release_bytes, true); } @@ -1904,13 +1957,23 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, pos = iocb->ki_pos; count = iov_iter_count(from); if (iocb->ki_flags & IOCB_NOWAIT) { + size_t nocow_bytes = count; + /* * We will allocate space in case nodatacow is not set, * so bail */ - if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | - BTRFS_INODE_PREALLOC)) || - check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) { + if (check_nocow_nolock(BTRFS_I(inode), pos, &nocow_bytes) + <= 0) { + inode_unlock(inode); + return -EAGAIN; + } + /* + * There are holes in the range or parts of the range that must + * be COWed (shared extents, RO block groups, etc), so just bail + * out. + */ + if (nocow_bytes < count) { inode_unlock(inode); return -EAGAIN; } @@ -2570,7 +2633,7 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path, cur_offset = start; while (cur_offset < end) { - ret = __btrfs_drop_extents(trans, root, inode, path, + ret = __btrfs_drop_extents(trans, root, BTRFS_I(inode), path, cur_offset, end + 1, &drop_end, 1, 0, 0, NULL); if (ret != -ENOSPC) { @@ -3148,14 +3211,14 @@ reserve_space: if (ret < 0) goto out; space_reserved = true; - ret = btrfs_qgroup_reserve_data(inode, &data_reserved, - alloc_start, bytes_to_reserve); - if (ret) - goto out; ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend, &cached_state); if (ret) goto out; + ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved, + alloc_start, bytes_to_reserve); + if (ret) + goto out; ret = btrfs_prealloc_file_range(inode, mode, alloc_start, alloc_end - alloc_start, i_blocksize(inode), @@ -3171,7 +3234,7 @@ reserve_space: ret = btrfs_fallocate_update_isize(inode, offset + len, mode); out: if (ret && space_reserved) - btrfs_free_reserved_data_space(inode, data_reserved, + btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved, alloc_start, bytes_to_reserve); extent_changeset_free(data_reserved); @@ -3322,8 +3385,9 @@ static long btrfs_fallocate(struct file *file, int mode, free_extent_map(em); break; } - ret = btrfs_qgroup_reserve_data(inode, &data_reserved, - cur_offset, last_byte - cur_offset); + ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), + &data_reserved, cur_offset, + last_byte - cur_offset); if (ret < 0) { cur_offset = last_byte; free_extent_map(em); @@ -3335,8 +3399,9 @@ static long btrfs_fallocate(struct file *file, int mode, * range, free reserved data space first, otherwise * it'll result in false ENOSPC error. */ - btrfs_free_reserved_data_space(inode, data_reserved, - cur_offset, last_byte - cur_offset); + btrfs_free_reserved_data_space(BTRFS_I(inode), + data_reserved, cur_offset, + last_byte - cur_offset); } free_extent_map(em); cur_offset = last_byte; @@ -3353,7 +3418,7 @@ static long btrfs_fallocate(struct file *file, int mode, range->len, i_blocksize(inode), offset + len, &alloc_hint); else - btrfs_free_reserved_data_space(inode, + btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved, range->start, range->len); list_del(&range->list); @@ -3374,7 +3439,7 @@ out: inode_unlock(inode); /* Let go of our reservation. */ if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE)) - btrfs_free_reserved_data_space(inode, data_reserved, + btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved, cur_offset, alloc_end - cur_offset); extent_changeset_free(data_reserved); return ret; @@ -3472,7 +3537,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) static int btrfs_file_open(struct inode *inode, struct file *filp) { - filp->f_mode |= FMODE_NOWAIT; + filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; return generic_file_open(inode, filp); } @@ -3481,6 +3546,7 @@ const struct file_operations btrfs_file_operations = { .read_iter = generic_file_read_iter, .splice_read = generic_file_splice_read, .write_iter = btrfs_file_write_iter, + .splice_write = iter_file_splice_write, .mmap = btrfs_file_mmap, .open = btrfs_file_open, .release = btrfs_release_file, |