summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/block-group.c8
-rw-r--r--fs/btrfs/compression.c49
-rw-r--r--fs/btrfs/disk-io.c26
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/extent_io.c9
-rw-r--r--fs/btrfs/file-item.c108
-rw-r--r--fs/btrfs/file.c4
-rw-r--r--fs/btrfs/inode.c22
-rw-r--r--fs/btrfs/reflink.c33
-rw-r--r--fs/btrfs/tree-log.c57
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/btrfs/zoned.c27
-rw-r--r--fs/btrfs/zoned.h5
13 files changed, 270 insertions, 82 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index aa57bdc8fc89..6d5c4e45cfef 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2442,16 +2442,16 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
spin_lock(&sinfo->lock);
spin_lock(&cache->lock);
if (!--cache->ro) {
- num_bytes = cache->length - cache->reserved -
- cache->pinned - cache->bytes_super -
- cache->zone_unusable - cache->used;
- sinfo->bytes_readonly -= num_bytes;
if (btrfs_is_zoned(cache->fs_info)) {
/* Migrate zone_unusable bytes back */
cache->zone_unusable = cache->alloc_offset - cache->used;
sinfo->bytes_zone_unusable += cache->zone_unusable;
sinfo->bytes_readonly -= cache->zone_unusable;
}
+ num_bytes = cache->length - cache->reserved -
+ cache->pinned - cache->bytes_super -
+ cache->zone_unusable - cache->used;
+ sinfo->bytes_readonly -= num_bytes;
list_del_init(&cache->ro_list);
}
spin_unlock(&cache->lock);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 2bea01d23a5b..1346d698463a 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -28,6 +28,7 @@
#include "compression.h"
#include "extent_io.h"
#include "extent_map.h"
+#include "zoned.h"
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
@@ -349,6 +350,7 @@ static void end_compressed_bio_write(struct bio *bio)
*/
inode = cb->inode;
cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
+ btrfs_record_physical_zoned(inode, cb->start, bio);
btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0],
cb->start, cb->start + cb->len - 1,
bio->bi_status == BLK_STS_OK);
@@ -401,6 +403,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
u64 first_byte = disk_start;
blk_status_t ret;
int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
+ const bool use_append = btrfs_use_zone_append(inode, disk_start);
+ const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
WARN_ON(!PAGE_ALIGNED(start));
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
@@ -418,10 +422,31 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
cb->nr_pages = nr_pages;
bio = btrfs_bio_alloc(first_byte);
- bio->bi_opf = REQ_OP_WRITE | write_flags;
+ bio->bi_opf = bio_op | write_flags;
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
+ if (use_append) {
+ struct extent_map *em;
+ struct map_lookup *map;
+ struct block_device *bdev;
+
+ em = btrfs_get_chunk_map(fs_info, disk_start, PAGE_SIZE);
+ if (IS_ERR(em)) {
+ kfree(cb);
+ bio_put(bio);
+ return BLK_STS_NOTSUPP;
+ }
+
+ map = em->map_lookup;
+ /* We only support single profile for now */
+ ASSERT(map->num_stripes == 1);
+ bdev = map->stripes[0].dev->bdev;
+
+ bio_set_dev(bio, bdev);
+ free_extent_map(em);
+ }
+
if (blkcg_css) {
bio->bi_opf |= REQ_CGROUP_PUNT;
kthread_associate_blkcg(blkcg_css);
@@ -432,6 +457,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
bytes_left = compressed_len;
for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
int submit = 0;
+ int len = 0;
page = compressed_pages[pg_index];
page->mapping = inode->vfs_inode.i_mapping;
@@ -439,9 +465,20 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
0);
+ /*
+ * Page can only be added to bio if the current bio fits in
+ * stripe.
+ */
+ if (!submit) {
+ if (pg_index == 0 && use_append)
+ len = bio_add_zone_append_page(bio, page,
+ PAGE_SIZE, 0);
+ else
+ len = bio_add_page(bio, page, PAGE_SIZE, 0);
+ }
+
page->mapping = NULL;
- if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
- PAGE_SIZE) {
+ if (submit || len < PAGE_SIZE) {
/*
* inc the count before we submit the bio so
* we know the end IO handler won't happen before
@@ -465,11 +502,15 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
}
bio = btrfs_bio_alloc(first_byte);
- bio->bi_opf = REQ_OP_WRITE | write_flags;
+ bio->bi_opf = bio_op | write_flags;
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
if (blkcg_css)
bio->bi_opf |= REQ_CGROUP_PUNT;
+ /*
+ * Use bio_add_page() to ensure the bio has at least one
+ * page.
+ */
bio_add_page(bio, page, PAGE_SIZE, 0);
}
if (bytes_left < PAGE_SIZE) {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c9a3036c23bf..8d386a5587ee 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2648,6 +2648,24 @@ static int validate_super(struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
+ if (memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid,
+ BTRFS_FSID_SIZE)) {
+ btrfs_err(fs_info,
+ "superblock fsid doesn't match fsid of fs_devices: %pU != %pU",
+ fs_info->super_copy->fsid, fs_info->fs_devices->fsid);
+ ret = -EINVAL;
+ }
+
+ if (btrfs_fs_incompat(fs_info, METADATA_UUID) &&
+ memcmp(fs_info->fs_devices->metadata_uuid,
+ fs_info->super_copy->metadata_uuid, BTRFS_FSID_SIZE)) {
+ btrfs_err(fs_info,
+"superblock metadata_uuid doesn't match metadata uuid of fs_devices: %pU != %pU",
+ fs_info->super_copy->metadata_uuid,
+ fs_info->fs_devices->metadata_uuid);
+ ret = -EINVAL;
+ }
+
if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid,
BTRFS_FSID_SIZE) != 0) {
btrfs_err(fs_info,
@@ -3279,14 +3297,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
disk_super = fs_info->super_copy;
- ASSERT(!memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid,
- BTRFS_FSID_SIZE));
-
- if (btrfs_fs_incompat(fs_info, METADATA_UUID)) {
- ASSERT(!memcmp(fs_info->fs_devices->metadata_uuid,
- fs_info->super_copy->metadata_uuid,
- BTRFS_FSID_SIZE));
- }
features = btrfs_super_flags(disk_super);
if (features & BTRFS_SUPER_FLAG_CHANGING_FSID_V2) {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f1d15b68994a..3d5c35e4cb76 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1868,7 +1868,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
trace_run_delayed_ref_head(fs_info, head, 0);
btrfs_delayed_ref_unlock(head);
btrfs_put_delayed_ref_head(head);
- return 0;
+ return ret;
}
static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 074a78a202b8..dee2dafbc872 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3753,7 +3753,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
/* Note that em_end from extent_map_end() is exclusive */
iosize = min(em_end, end + 1) - cur;
- if (btrfs_use_zone_append(inode, em))
+ if (btrfs_use_zone_append(inode, em->block_start))
opf = REQ_OP_ZONE_APPEND;
free_extent_map(em);
@@ -5196,7 +5196,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
int ret = 0;
- u64 off = start;
+ u64 off;
u64 max = start + len;
u32 flags = 0;
u32 found_type;
@@ -5231,6 +5231,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
goto out_free_ulist;
}
+ /*
+ * We can't initialize that to 'start' as this could miss extents due
+ * to extent item merging
+ */
+ off = 0;
start = round_down(start, btrfs_inode_sectorsize(inode));
len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 294602f139ef..441cee7fbb62 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -788,7 +788,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
u64 end_byte = bytenr + len;
u64 csum_end;
struct extent_buffer *leaf;
- int ret;
+ int ret = 0;
const u32 csum_size = fs_info->csum_size;
u32 blocksize_bits = fs_info->sectorsize_bits;
@@ -806,6 +806,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0) {
+ ret = 0;
if (path->slots[0] == 0)
break;
path->slots[0]--;
@@ -862,7 +863,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
ret = btrfs_del_items(trans, root, path,
path->slots[0], del_nr);
if (ret)
- goto out;
+ break;
if (key.offset == bytenr)
break;
} else if (key.offset < bytenr && csum_end > end_byte) {
@@ -906,8 +907,9 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
ret = btrfs_split_item(trans, root, path, &key, offset);
if (ret && ret != -EAGAIN) {
btrfs_abort_transaction(trans, ret);
- goto out;
+ break;
}
+ ret = 0;
key.offset = end_byte - 1;
} else {
@@ -917,12 +919,41 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
}
- ret = 0;
-out:
btrfs_free_path(path);
return ret;
}
+static int find_next_csum_offset(struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 *next_offset)
+{
+ const u32 nritems = btrfs_header_nritems(path->nodes[0]);
+ struct btrfs_key found_key;
+ int slot = path->slots[0] + 1;
+ int ret;
+
+ if (nritems == 0 || slot >= nritems) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0) {
+ return ret;
+ } else if (ret > 0) {
+ *next_offset = (u64)-1;
+ return 0;
+ }
+ slot = path->slots[0];
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
+
+ if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
+ found_key.type != BTRFS_EXTENT_CSUM_KEY)
+ *next_offset = (u64)-1;
+ else
+ *next_offset = found_key.offset;
+
+ return 0;
+}
+
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums)
@@ -938,7 +969,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
u64 total_bytes = 0;
u64 csum_offset;
u64 bytenr;
- u32 nritems;
u32 ins_size;
int index = 0;
int found_next;
@@ -981,26 +1011,10 @@ again:
goto insert;
}
} else {
- int slot = path->slots[0] + 1;
- /* we didn't find a csum item, insert one */
- nritems = btrfs_header_nritems(path->nodes[0]);
- if (!nritems || (path->slots[0] >= nritems - 1)) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0) {
- goto out;
- } else if (ret > 0) {
- found_next = 1;
- goto insert;
- }
- slot = path->slots[0];
- }
- btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
- if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
- found_key.type != BTRFS_EXTENT_CSUM_KEY) {
- found_next = 1;
- goto insert;
- }
- next_offset = found_key.offset;
+ /* We didn't find a csum item, insert one. */
+ ret = find_next_csum_offset(root, path, &next_offset);
+ if (ret < 0)
+ goto out;
found_next = 1;
goto insert;
}
@@ -1056,8 +1070,48 @@ extend_csum:
tmp = sums->len - total_bytes;
tmp >>= fs_info->sectorsize_bits;
WARN_ON(tmp < 1);
+ extend_nr = max_t(int, 1, tmp);
+
+ /*
+ * A log tree can already have checksum items with a subset of
+ * the checksums we are trying to log. This can happen after
+ * doing a sequence of partial writes into prealloc extents and
+ * fsyncs in between, with a full fsync logging a larger subrange
+ * of an extent for which a previous fast fsync logged a smaller
+ * subrange. And this happens in particular due to merging file
+ * extent items when we complete an ordered extent for a range
+ * covered by a prealloc extent - this is done at
+ * btrfs_mark_extent_written().
+ *
+ * So if we try to extend the previous checksum item, which has
+ * a range that ends at the start of the range we want to insert,
+ * make sure we don't extend beyond the start offset of the next
+ * checksum item. If we are at the last item in the leaf, then
+ * forget the optimization of extending and add a new checksum
+ * item - it is not worth the complexity of releasing the path,
+ * getting the first key for the next leaf, repeat the btree
+ * search, etc, because log trees are temporary anyway and it
+ * would only save a few bytes of leaf space.
+ */
+ if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
+ if (path->slots[0] + 1 >=
+ btrfs_header_nritems(path->nodes[0])) {
+ ret = find_next_csum_offset(root, path, &next_offset);
+ if (ret < 0)
+ goto out;
+ found_next = 1;
+ goto insert;
+ }
+
+ ret = find_next_csum_offset(root, path, &next_offset);
+ if (ret < 0)
+ goto out;
+
+ tmp = (next_offset - bytenr) >> fs_info->sectorsize_bits;
+ if (tmp <= INT_MAX)
+ extend_nr = min_t(int, extend_nr, tmp);
+ }
- extend_nr = max_t(int, 1, (int)tmp);
diff = (csum_offset + extend_nr) * csum_size;
diff = min(diff,
MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 3b10d98b4ebb..55f68422061d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1094,7 +1094,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
int del_nr = 0;
int del_slot = 0;
int recow;
- int ret;
+ int ret = 0;
u64 ino = btrfs_ino(inode);
path = btrfs_alloc_path();
@@ -1315,7 +1315,7 @@ again:
}
out:
btrfs_free_path(path);
- return 0;
+ return ret;
}
/*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index eb6fddf40841..46f392943f4d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3000,6 +3000,18 @@ out:
if (ret || truncated) {
u64 unwritten_start = start;
+ /*
+ * If we failed to finish this ordered extent for any reason we
+ * need to make sure BTRFS_ORDERED_IOERR is set on the ordered
+ * extent, and mark the inode with the error if it wasn't
+ * already set. Any error during writeback would have already
+ * set the mapping error, so we need to set it if we're the ones
+ * marking this ordered extent as failed.
+ */
+ if (ret && !test_and_set_bit(BTRFS_ORDERED_IOERR,
+ &ordered_extent->flags))
+ mapping_set_error(ordered_extent->inode->i_mapping, -EIO);
+
if (truncated)
unwritten_start += logical_len;
clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
@@ -3241,6 +3253,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
inode = list_first_entry(&fs_info->delayed_iputs,
struct btrfs_inode, delayed_iput);
run_delayed_iput_locked(fs_info, inode);
+ cond_resched_lock(&fs_info->delayed_iput_lock);
}
spin_unlock(&fs_info->delayed_iput_lock);
}
@@ -7785,7 +7798,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
iomap->bdev = fs_info->fs_devices->latest_bdev;
iomap->length = len;
- if (write && btrfs_use_zone_append(BTRFS_I(inode), em))
+ if (write && btrfs_use_zone_append(BTRFS_I(inode), em->block_start))
iomap->flags |= IOMAP_F_ZONE_APPEND;
free_extent_map(em);
@@ -9075,6 +9088,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
int ret2;
bool root_log_pinned = false;
bool dest_log_pinned = false;
+ bool need_abort = false;
/* we only allow rename subvolume link between subvolumes */
if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
@@ -9134,6 +9148,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
old_idx);
if (ret)
goto out_fail;
+ need_abort = true;
}
/* And now for the dest. */
@@ -9149,8 +9164,11 @@ static int btrfs_rename_exchange(struct inode *old_dir,
new_ino,
btrfs_ino(BTRFS_I(old_dir)),
new_idx);
- if (ret)
+ if (ret) {
+ if (need_abort)
+ btrfs_abort_transaction(trans, ret);
goto out_fail;
+ }
}
/* Update inode version and ctime/mtime. */
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 3928ecc40d7b..9178da07cc9c 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -203,10 +203,7 @@ static int clone_copy_inline_extent(struct inode *dst,
* inline extent's data to the page.
*/
ASSERT(key.offset > 0);
- ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset,
- inline_data, size, datal,
- comp_type);
- goto out;
+ goto copy_to_page;
}
} else if (i_size_read(dst) <= datal) {
struct btrfs_file_extent_item *ei;
@@ -222,13 +219,10 @@ static int clone_copy_inline_extent(struct inode *dst,
BTRFS_FILE_EXTENT_INLINE)
goto copy_inline_extent;
- ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset,
- inline_data, size, datal, comp_type);
- goto out;
+ goto copy_to_page;
}
copy_inline_extent:
- ret = 0;
/*
* We have no extent items, or we have an extent at offset 0 which may
* or may not be inlined. All these cases are dealt the same way.
@@ -240,11 +234,13 @@ copy_inline_extent:
* clone. Deal with all these cases by copying the inline extent
* data into the respective page at the destination inode.
*/
- ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset,
- inline_data, size, datal, comp_type);
- goto out;
+ goto copy_to_page;
}
+ /*
+ * Release path before starting a new transaction so we don't hold locks
+ * that would confuse lockdep.
+ */
btrfs_release_path(path);
/*
* If we end up here it means were copy the inline extent into a leaf
@@ -301,6 +297,21 @@ out:
*trans_out = trans;
return ret;
+
+copy_to_page:
+ /*
+ * Release our path because we don't need it anymore and also because
+ * copy_inline_to_page() needs to reserve data and metadata, which may
+ * need to flush delalloc when we are low on available space and
+ * therefore cause a deadlock if writeback of an inline extent needs to
+ * write to the same leaf or an ordered extent completion needs to write
+ * to the same leaf.
+ */
+ btrfs_release_path(path);
+
+ ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset,
+ inline_data, size, datal, comp_type);
+ goto out;
}
/**
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 95a600034d61..dbcf8bb2f3b9 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1574,7 +1574,9 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
if (ret)
goto out;
- btrfs_update_inode(trans, root, BTRFS_I(inode));
+ ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
+ if (ret)
+ goto out;
}
ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
@@ -1749,7 +1751,9 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
if (nlink != inode->i_nlink) {
set_nlink(inode, nlink);
- btrfs_update_inode(trans, root, BTRFS_I(inode));
+ ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
+ if (ret)
+ goto out;
}
BTRFS_I(inode)->index_cnt = (u64)-1;
@@ -1787,6 +1791,7 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
break;
if (ret == 1) {
+ ret = 0;
if (path->slots[0] == 0)
break;
path->slots[0]--;
@@ -1799,17 +1804,19 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
ret = btrfs_del_item(trans, root, path);
if (ret)
- goto out;
+ break;
btrfs_release_path(path);
inode = read_one_inode(root, key.offset);
- if (!inode)
- return -EIO;
+ if (!inode) {
+ ret = -EIO;
+ break;
+ }
ret = fixup_inode_link_count(trans, root, inode);
iput(inode);
if (ret)
- goto out;
+ break;
/*
* fixup on a directory may create new entries,
@@ -1818,8 +1825,6 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
*/
key.offset = (u64)-1;
}
- ret = 0;
-out:
btrfs_release_path(path);
return ret;
}
@@ -1858,8 +1863,6 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
} else if (ret == -EEXIST) {
ret = 0;
- } else {
- BUG(); /* Logic Error */
}
iput(inode);
@@ -3299,6 +3302,22 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* begins and releases it only after writing its superblock.
*/
mutex_lock(&fs_info->tree_log_mutex);
+
+ /*
+ * The previous transaction writeout phase could have failed, and thus
+ * marked the fs in an error state. We must not commit here, as we
+ * could have updated our generation in the super_for_commit and
+ * writing the super here would result in transid mismatches. If there
+ * is an error here just bail.
+ */
+ if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+ ret = -EIO;
+ btrfs_set_log_full_commit(trans);
+ btrfs_abort_transaction(trans, ret);
+ mutex_unlock(&fs_info->tree_log_mutex);
+ goto out_wake_log_root;
+ }
+
btrfs_set_super_log_root(fs_info->super_for_commit, log_root_start);
btrfs_set_super_log_root_level(fs_info->super_for_commit, log_root_level);
ret = write_all_supers(fs_info, 1);
@@ -6463,6 +6482,24 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
(!old_dir || old_dir->logged_trans < trans->transid))
return;
+ /*
+ * If we are doing a rename (old_dir is not NULL) from a directory that
+ * was previously logged, make sure the next log attempt on the directory
+ * is not skipped and logs the inode again. This is because the log may
+ * not currently be authoritative for a range including the old
+ * BTRFS_DIR_ITEM_KEY and BTRFS_DIR_INDEX_KEY keys, so we want to make
+ * sure after a log replay we do not end up with both the new and old
+ * dentries around (in case the inode is a directory we would have a
+ * directory with two hard links and 2 inode references for different
+ * parents). The next log attempt of old_dir will happen at
+ * btrfs_log_all_parents(), called through btrfs_log_inode_parent()
+ * below, because we have previously set inode->last_unlink_trans to the
+ * current transaction ID, either here or at btrfs_record_unlink_dir() in
+ * case inode is a directory.
+ */
+ if (old_dir)
+ old_dir->logged_trans = 0;
+
btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
ctx.logging_new_name = true;
/*
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9a1ead0c4a31..47d27059d064 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1459,7 +1459,7 @@ static bool dev_extent_hole_check_zoned(struct btrfs_device *device,
/* Given hole range was invalid (outside of device) */
if (ret == -ERANGE) {
*hole_start += *hole_size;
- *hole_size = false;
+ *hole_size = 0;
return true;
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 304ce64c70a4..f1f3b10d1dbb 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -150,6 +150,18 @@ static inline u32 sb_zone_number(int shift, int mirror)
return (u32)zone;
}
+static inline sector_t zone_start_sector(u32 zone_number,
+ struct block_device *bdev)
+{
+ return (sector_t)zone_number << ilog2(bdev_zone_sectors(bdev));
+}
+
+static inline u64 zone_start_physical(u32 zone_number,
+ struct btrfs_zoned_device_info *zone_info)
+{
+ return (u64)zone_number << zone_info->zone_size_shift;
+}
+
/*
* Emulate blkdev_report_zones() for a non-zoned device. It slices up the block
* device into static sized chunks and fake a conventional zone on each of
@@ -405,8 +417,8 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
if (sb_zone + 1 >= zone_info->nr_zones)
continue;
- sector = sb_zone << (zone_info->zone_size_shift - SECTOR_SHIFT);
- ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT,
+ ret = btrfs_get_dev_zones(device,
+ zone_start_physical(sb_zone, zone_info),
&zone_info->sb_zones[sb_pos],
&nr_zones);
if (ret)
@@ -721,7 +733,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
if (sb_zone + 1 >= nr_zones)
return -ENOENT;
- ret = blkdev_report_zones(bdev, sb_zone << zone_sectors_shift,
+ ret = blkdev_report_zones(bdev, zone_start_sector(sb_zone, bdev),
BTRFS_NR_SB_LOG_ZONES, copy_zone_info_cb,
zones);
if (ret < 0)
@@ -826,7 +838,7 @@ int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
return -ENOENT;
return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
- sb_zone << zone_sectors_shift,
+ zone_start_sector(sb_zone, bdev),
zone_sectors * BTRFS_NR_SB_LOG_ZONES, GFP_NOFS);
}
@@ -878,7 +890,8 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
if (!(end <= sb_zone ||
sb_zone + BTRFS_NR_SB_LOG_ZONES <= begin)) {
have_sb = true;
- pos = ((u64)sb_zone + BTRFS_NR_SB_LOG_ZONES) << shift;
+ pos = zone_start_physical(
+ sb_zone + BTRFS_NR_SB_LOG_ZONES, zinfo);
break;
}
@@ -1278,7 +1291,7 @@ void btrfs_free_redirty_list(struct btrfs_transaction *trans)
spin_unlock(&trans->releasing_ebs_lock);
}
-bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em)
+bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_block_group *cache;
@@ -1293,7 +1306,7 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em)
if (!is_data_inode(&inode->vfs_inode))
return false;
- cache = btrfs_lookup_block_group(fs_info, em->block_start);
+ cache = btrfs_lookup_block_group(fs_info, start);
ASSERT(cache);
if (!cache)
return false;
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 5e41a74a9cb2..e55d32595c2c 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -53,7 +53,7 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache);
void btrfs_redirty_list_add(struct btrfs_transaction *trans,
struct extent_buffer *eb);
void btrfs_free_redirty_list(struct btrfs_transaction *trans);
-bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em);
+bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start);
void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset,
struct bio *bio);
void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered);
@@ -152,8 +152,7 @@ static inline void btrfs_redirty_list_add(struct btrfs_transaction *trans,
struct extent_buffer *eb) { }
static inline void btrfs_free_redirty_list(struct btrfs_transaction *trans) { }
-static inline bool btrfs_use_zone_append(struct btrfs_inode *inode,
- struct extent_map *em)
+static inline bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
{
return false;
}