diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/block-group.c | 8 | ||||
-rw-r--r-- | fs/btrfs/compression.c | 49 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 26 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 2 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 9 | ||||
-rw-r--r-- | fs/btrfs/file-item.c | 108 | ||||
-rw-r--r-- | fs/btrfs/file.c | 4 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 22 | ||||
-rw-r--r-- | fs/btrfs/reflink.c | 33 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 57 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 2 | ||||
-rw-r--r-- | fs/btrfs/zoned.c | 27 | ||||
-rw-r--r-- | fs/btrfs/zoned.h | 5 |
13 files changed, 270 insertions, 82 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index aa57bdc8fc89..6d5c4e45cfef 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2442,16 +2442,16 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache) spin_lock(&sinfo->lock); spin_lock(&cache->lock); if (!--cache->ro) { - num_bytes = cache->length - cache->reserved - - cache->pinned - cache->bytes_super - - cache->zone_unusable - cache->used; - sinfo->bytes_readonly -= num_bytes; if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes back */ cache->zone_unusable = cache->alloc_offset - cache->used; sinfo->bytes_zone_unusable += cache->zone_unusable; sinfo->bytes_readonly -= cache->zone_unusable; } + num_bytes = cache->length - cache->reserved - + cache->pinned - cache->bytes_super - + cache->zone_unusable - cache->used; + sinfo->bytes_readonly -= num_bytes; list_del_init(&cache->ro_list); } spin_unlock(&cache->lock); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 2bea01d23a5b..1346d698463a 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -28,6 +28,7 @@ #include "compression.h" #include "extent_io.h" #include "extent_map.h" +#include "zoned.h" static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" }; @@ -349,6 +350,7 @@ static void end_compressed_bio_write(struct bio *bio) */ inode = cb->inode; cb->compressed_pages[0]->mapping = cb->inode->i_mapping; + btrfs_record_physical_zoned(inode, cb->start, bio); btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0], cb->start, cb->start + cb->len - 1, bio->bi_status == BLK_STS_OK); @@ -401,6 +403,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, u64 first_byte = disk_start; blk_status_t ret; int skip_sum = inode->flags & BTRFS_INODE_NODATASUM; + const bool use_append = btrfs_use_zone_append(inode, disk_start); + const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE; WARN_ON(!PAGE_ALIGNED(start)); cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); @@ -418,10 +422,31 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cb->nr_pages = nr_pages; bio = btrfs_bio_alloc(first_byte); - bio->bi_opf = REQ_OP_WRITE | write_flags; + bio->bi_opf = bio_op | write_flags; bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; + if (use_append) { + struct extent_map *em; + struct map_lookup *map; + struct block_device *bdev; + + em = btrfs_get_chunk_map(fs_info, disk_start, PAGE_SIZE); + if (IS_ERR(em)) { + kfree(cb); + bio_put(bio); + return BLK_STS_NOTSUPP; + } + + map = em->map_lookup; + /* We only support single profile for now */ + ASSERT(map->num_stripes == 1); + bdev = map->stripes[0].dev->bdev; + + bio_set_dev(bio, bdev); + free_extent_map(em); + } + if (blkcg_css) { bio->bi_opf |= REQ_CGROUP_PUNT; kthread_associate_blkcg(blkcg_css); @@ -432,6 +457,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, bytes_left = compressed_len; for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) { int submit = 0; + int len = 0; page = compressed_pages[pg_index]; page->mapping = inode->vfs_inode.i_mapping; @@ -439,9 +465,20 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio, 0); + /* + * Page can only be added to bio if the current bio fits in + * stripe. + */ + if (!submit) { + if (pg_index == 0 && use_append) + len = bio_add_zone_append_page(bio, page, + PAGE_SIZE, 0); + else + len = bio_add_page(bio, page, PAGE_SIZE, 0); + } + page->mapping = NULL; - if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) < - PAGE_SIZE) { + if (submit || len < PAGE_SIZE) { /* * inc the count before we submit the bio so * we know the end IO handler won't happen before @@ -465,11 +502,15 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, } bio = btrfs_bio_alloc(first_byte); - bio->bi_opf = REQ_OP_WRITE | write_flags; + bio->bi_opf = bio_op | write_flags; bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; if (blkcg_css) bio->bi_opf |= REQ_CGROUP_PUNT; + /* + * Use bio_add_page() to ensure the bio has at least one + * page. + */ bio_add_page(bio, page, PAGE_SIZE, 0); } if (bytes_left < PAGE_SIZE) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c9a3036c23bf..8d386a5587ee 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2648,6 +2648,24 @@ static int validate_super(struct btrfs_fs_info *fs_info, ret = -EINVAL; } + if (memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid, + BTRFS_FSID_SIZE)) { + btrfs_err(fs_info, + "superblock fsid doesn't match fsid of fs_devices: %pU != %pU", + fs_info->super_copy->fsid, fs_info->fs_devices->fsid); + ret = -EINVAL; + } + + if (btrfs_fs_incompat(fs_info, METADATA_UUID) && + memcmp(fs_info->fs_devices->metadata_uuid, + fs_info->super_copy->metadata_uuid, BTRFS_FSID_SIZE)) { + btrfs_err(fs_info, +"superblock metadata_uuid doesn't match metadata uuid of fs_devices: %pU != %pU", + fs_info->super_copy->metadata_uuid, + fs_info->fs_devices->metadata_uuid); + ret = -EINVAL; + } + if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) { btrfs_err(fs_info, @@ -3279,14 +3297,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device disk_super = fs_info->super_copy; - ASSERT(!memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid, - BTRFS_FSID_SIZE)); - - if (btrfs_fs_incompat(fs_info, METADATA_UUID)) { - ASSERT(!memcmp(fs_info->fs_devices->metadata_uuid, - fs_info->super_copy->metadata_uuid, - BTRFS_FSID_SIZE)); - } features = btrfs_super_flags(disk_super); if (features & BTRFS_SUPER_FLAG_CHANGING_FSID_V2) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f1d15b68994a..3d5c35e4cb76 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1868,7 +1868,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, trace_run_delayed_ref_head(fs_info, head, 0); btrfs_delayed_ref_unlock(head); btrfs_put_delayed_ref_head(head); - return 0; + return ret; } static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head( diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 074a78a202b8..dee2dafbc872 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3753,7 +3753,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, /* Note that em_end from extent_map_end() is exclusive */ iosize = min(em_end, end + 1) - cur; - if (btrfs_use_zone_append(inode, em)) + if (btrfs_use_zone_append(inode, em->block_start)) opf = REQ_OP_ZONE_APPEND; free_extent_map(em); @@ -5196,7 +5196,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { int ret = 0; - u64 off = start; + u64 off; u64 max = start + len; u32 flags = 0; u32 found_type; @@ -5231,6 +5231,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, goto out_free_ulist; } + /* + * We can't initialize that to 'start' as this could miss extents due + * to extent item merging + */ + off = 0; start = round_down(start, btrfs_inode_sectorsize(inode)); len = round_up(max, btrfs_inode_sectorsize(inode)) - start; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 294602f139ef..441cee7fbb62 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -788,7 +788,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, u64 end_byte = bytenr + len; u64 csum_end; struct extent_buffer *leaf; - int ret; + int ret = 0; const u32 csum_size = fs_info->csum_size; u32 blocksize_bits = fs_info->sectorsize_bits; @@ -806,6 +806,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) { + ret = 0; if (path->slots[0] == 0) break; path->slots[0]--; @@ -862,7 +863,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, ret = btrfs_del_items(trans, root, path, path->slots[0], del_nr); if (ret) - goto out; + break; if (key.offset == bytenr) break; } else if (key.offset < bytenr && csum_end > end_byte) { @@ -906,8 +907,9 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, ret = btrfs_split_item(trans, root, path, &key, offset); if (ret && ret != -EAGAIN) { btrfs_abort_transaction(trans, ret); - goto out; + break; } + ret = 0; key.offset = end_byte - 1; } else { @@ -917,12 +919,41 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, } btrfs_release_path(path); } - ret = 0; -out: btrfs_free_path(path); return ret; } +static int find_next_csum_offset(struct btrfs_root *root, + struct btrfs_path *path, + u64 *next_offset) +{ + const u32 nritems = btrfs_header_nritems(path->nodes[0]); + struct btrfs_key found_key; + int slot = path->slots[0] + 1; + int ret; + + if (nritems == 0 || slot >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) { + return ret; + } else if (ret > 0) { + *next_offset = (u64)-1; + return 0; + } + slot = path->slots[0]; + } + + btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); + + if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || + found_key.type != BTRFS_EXTENT_CSUM_KEY) + *next_offset = (u64)-1; + else + *next_offset = found_key.offset; + + return 0; +} + int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_ordered_sum *sums) @@ -938,7 +969,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, u64 total_bytes = 0; u64 csum_offset; u64 bytenr; - u32 nritems; u32 ins_size; int index = 0; int found_next; @@ -981,26 +1011,10 @@ again: goto insert; } } else { - int slot = path->slots[0] + 1; - /* we didn't find a csum item, insert one */ - nritems = btrfs_header_nritems(path->nodes[0]); - if (!nritems || (path->slots[0] >= nritems - 1)) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) { - goto out; - } else if (ret > 0) { - found_next = 1; - goto insert; - } - slot = path->slots[0]; - } - btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); - if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || - found_key.type != BTRFS_EXTENT_CSUM_KEY) { - found_next = 1; - goto insert; - } - next_offset = found_key.offset; + /* We didn't find a csum item, insert one. */ + ret = find_next_csum_offset(root, path, &next_offset); + if (ret < 0) + goto out; found_next = 1; goto insert; } @@ -1056,8 +1070,48 @@ extend_csum: tmp = sums->len - total_bytes; tmp >>= fs_info->sectorsize_bits; WARN_ON(tmp < 1); + extend_nr = max_t(int, 1, tmp); + + /* + * A log tree can already have checksum items with a subset of + * the checksums we are trying to log. This can happen after + * doing a sequence of partial writes into prealloc extents and + * fsyncs in between, with a full fsync logging a larger subrange + * of an extent for which a previous fast fsync logged a smaller + * subrange. And this happens in particular due to merging file + * extent items when we complete an ordered extent for a range + * covered by a prealloc extent - this is done at + * btrfs_mark_extent_written(). + * + * So if we try to extend the previous checksum item, which has + * a range that ends at the start of the range we want to insert, + * make sure we don't extend beyond the start offset of the next + * checksum item. If we are at the last item in the leaf, then + * forget the optimization of extending and add a new checksum + * item - it is not worth the complexity of releasing the path, + * getting the first key for the next leaf, repeat the btree + * search, etc, because log trees are temporary anyway and it + * would only save a few bytes of leaf space. + */ + if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { + if (path->slots[0] + 1 >= + btrfs_header_nritems(path->nodes[0])) { + ret = find_next_csum_offset(root, path, &next_offset); + if (ret < 0) + goto out; + found_next = 1; + goto insert; + } + + ret = find_next_csum_offset(root, path, &next_offset); + if (ret < 0) + goto out; + + tmp = (next_offset - bytenr) >> fs_info->sectorsize_bits; + if (tmp <= INT_MAX) + extend_nr = min_t(int, extend_nr, tmp); + } - extend_nr = max_t(int, 1, (int)tmp); diff = (csum_offset + extend_nr) * csum_size; diff = min(diff, MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3b10d98b4ebb..55f68422061d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1094,7 +1094,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, int del_nr = 0; int del_slot = 0; int recow; - int ret; + int ret = 0; u64 ino = btrfs_ino(inode); path = btrfs_alloc_path(); @@ -1315,7 +1315,7 @@ again: } out: btrfs_free_path(path); - return 0; + return ret; } /* diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index eb6fddf40841..46f392943f4d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3000,6 +3000,18 @@ out: if (ret || truncated) { u64 unwritten_start = start; + /* + * If we failed to finish this ordered extent for any reason we + * need to make sure BTRFS_ORDERED_IOERR is set on the ordered + * extent, and mark the inode with the error if it wasn't + * already set. Any error during writeback would have already + * set the mapping error, so we need to set it if we're the ones + * marking this ordered extent as failed. + */ + if (ret && !test_and_set_bit(BTRFS_ORDERED_IOERR, + &ordered_extent->flags)) + mapping_set_error(ordered_extent->inode->i_mapping, -EIO); + if (truncated) unwritten_start += logical_len; clear_extent_uptodate(io_tree, unwritten_start, end, NULL); @@ -3241,6 +3253,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) inode = list_first_entry(&fs_info->delayed_iputs, struct btrfs_inode, delayed_iput); run_delayed_iput_locked(fs_info, inode); + cond_resched_lock(&fs_info->delayed_iput_lock); } spin_unlock(&fs_info->delayed_iput_lock); } @@ -7785,7 +7798,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, iomap->bdev = fs_info->fs_devices->latest_bdev; iomap->length = len; - if (write && btrfs_use_zone_append(BTRFS_I(inode), em)) + if (write && btrfs_use_zone_append(BTRFS_I(inode), em->block_start)) iomap->flags |= IOMAP_F_ZONE_APPEND; free_extent_map(em); @@ -9075,6 +9088,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, int ret2; bool root_log_pinned = false; bool dest_log_pinned = false; + bool need_abort = false; /* we only allow rename subvolume link between subvolumes */ if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) @@ -9134,6 +9148,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, old_idx); if (ret) goto out_fail; + need_abort = true; } /* And now for the dest. */ @@ -9149,8 +9164,11 @@ static int btrfs_rename_exchange(struct inode *old_dir, new_ino, btrfs_ino(BTRFS_I(old_dir)), new_idx); - if (ret) + if (ret) { + if (need_abort) + btrfs_abort_transaction(trans, ret); goto out_fail; + } } /* Update inode version and ctime/mtime. */ diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index 3928ecc40d7b..9178da07cc9c 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -203,10 +203,7 @@ static int clone_copy_inline_extent(struct inode *dst, * inline extent's data to the page. */ ASSERT(key.offset > 0); - ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset, - inline_data, size, datal, - comp_type); - goto out; + goto copy_to_page; } } else if (i_size_read(dst) <= datal) { struct btrfs_file_extent_item *ei; @@ -222,13 +219,10 @@ static int clone_copy_inline_extent(struct inode *dst, BTRFS_FILE_EXTENT_INLINE) goto copy_inline_extent; - ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset, - inline_data, size, datal, comp_type); - goto out; + goto copy_to_page; } copy_inline_extent: - ret = 0; /* * We have no extent items, or we have an extent at offset 0 which may * or may not be inlined. All these cases are dealt the same way. @@ -240,11 +234,13 @@ copy_inline_extent: * clone. Deal with all these cases by copying the inline extent * data into the respective page at the destination inode. */ - ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset, - inline_data, size, datal, comp_type); - goto out; + goto copy_to_page; } + /* + * Release path before starting a new transaction so we don't hold locks + * that would confuse lockdep. + */ btrfs_release_path(path); /* * If we end up here it means were copy the inline extent into a leaf @@ -301,6 +297,21 @@ out: *trans_out = trans; return ret; + +copy_to_page: + /* + * Release our path because we don't need it anymore and also because + * copy_inline_to_page() needs to reserve data and metadata, which may + * need to flush delalloc when we are low on available space and + * therefore cause a deadlock if writeback of an inline extent needs to + * write to the same leaf or an ordered extent completion needs to write + * to the same leaf. + */ + btrfs_release_path(path); + + ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset, + inline_data, size, datal, comp_type); + goto out; } /** diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 95a600034d61..dbcf8bb2f3b9 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1574,7 +1574,9 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, if (ret) goto out; - btrfs_update_inode(trans, root, BTRFS_I(inode)); + ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); + if (ret) + goto out; } ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; @@ -1749,7 +1751,9 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, if (nlink != inode->i_nlink) { set_nlink(inode, nlink); - btrfs_update_inode(trans, root, BTRFS_I(inode)); + ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); + if (ret) + goto out; } BTRFS_I(inode)->index_cnt = (u64)-1; @@ -1787,6 +1791,7 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, break; if (ret == 1) { + ret = 0; if (path->slots[0] == 0) break; path->slots[0]--; @@ -1799,17 +1804,19 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, ret = btrfs_del_item(trans, root, path); if (ret) - goto out; + break; btrfs_release_path(path); inode = read_one_inode(root, key.offset); - if (!inode) - return -EIO; + if (!inode) { + ret = -EIO; + break; + } ret = fixup_inode_link_count(trans, root, inode); iput(inode); if (ret) - goto out; + break; /* * fixup on a directory may create new entries, @@ -1818,8 +1825,6 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, */ key.offset = (u64)-1; } - ret = 0; -out: btrfs_release_path(path); return ret; } @@ -1858,8 +1863,6 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); } else if (ret == -EEXIST) { ret = 0; - } else { - BUG(); /* Logic Error */ } iput(inode); @@ -3299,6 +3302,22 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, * begins and releases it only after writing its superblock. */ mutex_lock(&fs_info->tree_log_mutex); + + /* + * The previous transaction writeout phase could have failed, and thus + * marked the fs in an error state. We must not commit here, as we + * could have updated our generation in the super_for_commit and + * writing the super here would result in transid mismatches. If there + * is an error here just bail. + */ + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { + ret = -EIO; + btrfs_set_log_full_commit(trans); + btrfs_abort_transaction(trans, ret); + mutex_unlock(&fs_info->tree_log_mutex); + goto out_wake_log_root; + } + btrfs_set_super_log_root(fs_info->super_for_commit, log_root_start); btrfs_set_super_log_root_level(fs_info->super_for_commit, log_root_level); ret = write_all_supers(fs_info, 1); @@ -6463,6 +6482,24 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, (!old_dir || old_dir->logged_trans < trans->transid)) return; + /* + * If we are doing a rename (old_dir is not NULL) from a directory that + * was previously logged, make sure the next log attempt on the directory + * is not skipped and logs the inode again. This is because the log may + * not currently be authoritative for a range including the old + * BTRFS_DIR_ITEM_KEY and BTRFS_DIR_INDEX_KEY keys, so we want to make + * sure after a log replay we do not end up with both the new and old + * dentries around (in case the inode is a directory we would have a + * directory with two hard links and 2 inode references for different + * parents). The next log attempt of old_dir will happen at + * btrfs_log_all_parents(), called through btrfs_log_inode_parent() + * below, because we have previously set inode->last_unlink_trans to the + * current transaction ID, either here or at btrfs_record_unlink_dir() in + * case inode is a directory. + */ + if (old_dir) + old_dir->logged_trans = 0; + btrfs_init_log_ctx(&ctx, &inode->vfs_inode); ctx.logging_new_name = true; /* diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9a1ead0c4a31..47d27059d064 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1459,7 +1459,7 @@ static bool dev_extent_hole_check_zoned(struct btrfs_device *device, /* Given hole range was invalid (outside of device) */ if (ret == -ERANGE) { *hole_start += *hole_size; - *hole_size = false; + *hole_size = 0; return true; } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 304ce64c70a4..f1f3b10d1dbb 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -150,6 +150,18 @@ static inline u32 sb_zone_number(int shift, int mirror) return (u32)zone; } +static inline sector_t zone_start_sector(u32 zone_number, + struct block_device *bdev) +{ + return (sector_t)zone_number << ilog2(bdev_zone_sectors(bdev)); +} + +static inline u64 zone_start_physical(u32 zone_number, + struct btrfs_zoned_device_info *zone_info) +{ + return (u64)zone_number << zone_info->zone_size_shift; +} + /* * Emulate blkdev_report_zones() for a non-zoned device. It slices up the block * device into static sized chunks and fake a conventional zone on each of @@ -405,8 +417,8 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) if (sb_zone + 1 >= zone_info->nr_zones) continue; - sector = sb_zone << (zone_info->zone_size_shift - SECTOR_SHIFT); - ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, + ret = btrfs_get_dev_zones(device, + zone_start_physical(sb_zone, zone_info), &zone_info->sb_zones[sb_pos], &nr_zones); if (ret) @@ -721,7 +733,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw, if (sb_zone + 1 >= nr_zones) return -ENOENT; - ret = blkdev_report_zones(bdev, sb_zone << zone_sectors_shift, + ret = blkdev_report_zones(bdev, zone_start_sector(sb_zone, bdev), BTRFS_NR_SB_LOG_ZONES, copy_zone_info_cb, zones); if (ret < 0) @@ -826,7 +838,7 @@ int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror) return -ENOENT; return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, - sb_zone << zone_sectors_shift, + zone_start_sector(sb_zone, bdev), zone_sectors * BTRFS_NR_SB_LOG_ZONES, GFP_NOFS); } @@ -878,7 +890,8 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start, if (!(end <= sb_zone || sb_zone + BTRFS_NR_SB_LOG_ZONES <= begin)) { have_sb = true; - pos = ((u64)sb_zone + BTRFS_NR_SB_LOG_ZONES) << shift; + pos = zone_start_physical( + sb_zone + BTRFS_NR_SB_LOG_ZONES, zinfo); break; } @@ -1278,7 +1291,7 @@ void btrfs_free_redirty_list(struct btrfs_transaction *trans) spin_unlock(&trans->releasing_ebs_lock); } -bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em) +bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_block_group *cache; @@ -1293,7 +1306,7 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em) if (!is_data_inode(&inode->vfs_inode)) return false; - cache = btrfs_lookup_block_group(fs_info, em->block_start); + cache = btrfs_lookup_block_group(fs_info, start); ASSERT(cache); if (!cache) return false; diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 5e41a74a9cb2..e55d32595c2c 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -53,7 +53,7 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache); void btrfs_redirty_list_add(struct btrfs_transaction *trans, struct extent_buffer *eb); void btrfs_free_redirty_list(struct btrfs_transaction *trans); -bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em); +bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start); void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset, struct bio *bio); void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered); @@ -152,8 +152,7 @@ static inline void btrfs_redirty_list_add(struct btrfs_transaction *trans, struct extent_buffer *eb) { } static inline void btrfs_free_redirty_list(struct btrfs_transaction *trans) { } -static inline bool btrfs_use_zone_append(struct btrfs_inode *inode, - struct extent_map *em) +static inline bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start) { return false; } |