summaryrefslogtreecommitdiff
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c223
1 files changed, 161 insertions, 62 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 42c9899d9241..d322cf82783f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -190,7 +190,7 @@ static void process_one_folio(struct btrfs_fs_info *fs_info,
btrfs_folio_clamp_clear_writeback(fs_info, folio, start, len);
if (folio != locked_folio && (page_ops & PAGE_UNLOCK))
- btrfs_folio_end_writer_lock(fs_info, folio, start, len);
+ btrfs_folio_end_lock(fs_info, folio, start, len);
}
static void __process_folios_contig(struct address_space *mapping,
@@ -276,7 +276,7 @@ static noinline int lock_delalloc_folios(struct inode *inode,
range_start = max_t(u64, folio_pos(folio), start);
range_len = min_t(u64, folio_pos(folio) + folio_size(folio),
end + 1) - range_start;
- btrfs_folio_set_writer_lock(fs_info, folio, range_start, range_len);
+ btrfs_folio_set_lock(fs_info, folio, range_start, range_len);
processed_end = range_start + range_len - 1;
}
@@ -438,7 +438,7 @@ static void end_folio_read(struct folio *folio, bool uptodate, u64 start, u32 le
if (!btrfs_is_subpage(fs_info, folio->mapping))
folio_unlock(folio);
else
- btrfs_subpage_end_reader(fs_info, folio, start, len);
+ btrfs_folio_end_lock(fs_info, folio, start, len);
}
/*
@@ -495,7 +495,7 @@ static void begin_folio_read(struct btrfs_fs_info *fs_info, struct folio *folio)
return;
ASSERT(folio_test_private(folio));
- btrfs_subpage_start_reader(fs_info, folio, folio_pos(folio), PAGE_SIZE);
+ btrfs_folio_set_lock(fs_info, folio, folio_pos(folio), PAGE_SIZE);
}
/*
@@ -526,8 +526,6 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
u64 end;
u32 len;
- /* For now only order 0 folios are supported for data. */
- ASSERT(folio_order(folio) == 0);
btrfs_debug(fs_info,
"%s: bi_sector=%llu, err=%d, mirror=%u",
__func__, bio->bi_iter.bi_sector, bio->bi_status,
@@ -555,7 +553,6 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
if (likely(uptodate)) {
loff_t i_size = i_size_read(inode);
- pgoff_t end_index = i_size >> folio_shift(folio);
/*
* Zero out the remaining part if this range straddles
@@ -564,9 +561,11 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
* Here we should only zero the range inside the folio,
* not touch anything else.
*
- * NOTE: i_size is exclusive while end is inclusive.
+ * NOTE: i_size is exclusive while end is inclusive and
+ * folio_contains() takes PAGE_SIZE units.
*/
- if (folio_index(folio) == end_index && i_size <= end) {
+ if (folio_contains(folio, i_size >> PAGE_SHIFT) &&
+ i_size <= end) {
u32 zero_start = max(offset_in_folio(folio, i_size),
offset_in_folio(folio, start));
u32 zero_len = offset_in_folio(folio, end) + 1 -
@@ -901,12 +900,11 @@ void clear_folio_extent_mapped(struct folio *folio)
folio_detach_private(folio);
}
-static struct extent_map *__get_extent_map(struct inode *inode,
- struct folio *folio, u64 start,
- u64 len, struct extent_map **em_cached)
+static struct extent_map *get_extent_map(struct btrfs_inode *inode,
+ struct folio *folio, u64 start,
+ u64 len, struct extent_map **em_cached)
{
struct extent_map *em;
- struct extent_state *cached_state = NULL;
ASSERT(em_cached);
@@ -922,14 +920,12 @@ static struct extent_map *__get_extent_map(struct inode *inode,
*em_cached = NULL;
}
- btrfs_lock_and_flush_ordered_range(BTRFS_I(inode), start, start + len - 1, &cached_state);
- em = btrfs_get_extent(BTRFS_I(inode), folio, start, len);
+ em = btrfs_get_extent(inode, folio, start, len);
if (!IS_ERR(em)) {
BUG_ON(*em_cached);
refcount_inc(&em->refs);
*em_cached = em;
}
- unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len - 1, &cached_state);
return em;
}
@@ -963,7 +959,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
return ret;
}
- if (folio->index == last_byte >> folio_shift(folio)) {
+ if (folio_contains(folio, last_byte >> PAGE_SHIFT)) {
size_t zero_offset = offset_in_folio(folio, last_byte);
if (zero_offset) {
@@ -985,8 +981,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
end_folio_read(folio, true, cur, iosize);
break;
}
- em = __get_extent_map(inode, folio, cur, end - cur + 1,
- em_cached);
+ em = get_extent_map(BTRFS_I(inode), folio, cur, end - cur + 1, em_cached);
if (IS_ERR(em)) {
end_folio_read(folio, false, cur, end + 1 - cur);
return PTR_ERR(em);
@@ -1087,11 +1082,18 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
int btrfs_read_folio(struct file *file, struct folio *folio)
{
+ struct btrfs_inode *inode = folio_to_inode(folio);
+ const u64 start = folio_pos(folio);
+ const u64 end = start + folio_size(folio) - 1;
+ struct extent_state *cached_state = NULL;
struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ };
struct extent_map *em_cached = NULL;
int ret;
+ btrfs_lock_and_flush_ordered_range(inode, start, end, &cached_state);
ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl, NULL);
+ unlock_extent(&inode->io_tree, start, end, &cached_state);
+
free_extent_map(em_cached);
/*
@@ -1102,15 +1104,59 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
return ret;
}
+static void set_delalloc_bitmap(struct folio *folio, unsigned long *delalloc_bitmap,
+ u64 start, u32 len)
+{
+ struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
+ const u64 folio_start = folio_pos(folio);
+ unsigned int start_bit;
+ unsigned int nbits;
+
+ ASSERT(start >= folio_start && start + len <= folio_start + PAGE_SIZE);
+ start_bit = (start - folio_start) >> fs_info->sectorsize_bits;
+ nbits = len >> fs_info->sectorsize_bits;
+ ASSERT(bitmap_test_range_all_zero(delalloc_bitmap, start_bit, nbits));
+ bitmap_set(delalloc_bitmap, start_bit, nbits);
+}
+
+static bool find_next_delalloc_bitmap(struct folio *folio,
+ unsigned long *delalloc_bitmap, u64 start,
+ u64 *found_start, u32 *found_len)
+{
+ struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
+ const u64 folio_start = folio_pos(folio);
+ const unsigned int bitmap_size = fs_info->sectors_per_page;
+ unsigned int start_bit;
+ unsigned int first_zero;
+ unsigned int first_set;
+
+ ASSERT(start >= folio_start && start < folio_start + PAGE_SIZE);
+
+ start_bit = (start - folio_start) >> fs_info->sectorsize_bits;
+ first_set = find_next_bit(delalloc_bitmap, bitmap_size, start_bit);
+ if (first_set >= bitmap_size)
+ return false;
+
+ *found_start = folio_start + (first_set << fs_info->sectorsize_bits);
+ first_zero = find_next_zero_bit(delalloc_bitmap, bitmap_size, first_set);
+ *found_len = (first_zero - first_set) << fs_info->sectorsize_bits;
+ return true;
+}
+
/*
- * helper for extent_writepage(), doing all of the delayed allocation setup.
+ * Do all of the delayed allocation setup.
+ *
+ * Return >0 if all the dirty blocks are submitted async (compression) or inlined.
+ * The @folio should no longer be touched (treat it as already unlocked).
*
- * This returns 1 if btrfs_run_delalloc_range function did all the work required
- * to write the page (copy into inline extent). In this case the IO has
- * been started and the page is already unlocked.
+ * Return 0 if there is still dirty block that needs to be submitted through
+ * extent_writepage_io().
+ * bio_ctrl->submit_bitmap will indicate which blocks of the folio should be
+ * submitted, and @folio is still kept locked.
*
- * This returns 0 if all went well (page still locked)
- * This returns < 0 if there were errors (page still locked)
+ * Return <0 if there is any error hit.
+ * Any allocated ordered extent range covering this folio will be marked
+ * finished (IOERR), and @folio is still kept locked.
*/
static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
struct folio *folio,
@@ -1121,16 +1167,28 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
const bool is_subpage = btrfs_is_subpage(fs_info, folio->mapping);
const u64 page_start = folio_pos(folio);
const u64 page_end = page_start + folio_size(folio) - 1;
+ unsigned long delalloc_bitmap = 0;
/*
* Save the last found delalloc end. As the delalloc end can go beyond
* page boundary, thus we cannot rely on subpage bitmap to locate the
* last delalloc end.
*/
u64 last_delalloc_end = 0;
+ /*
+ * The range end (exclusive) of the last successfully finished delalloc
+ * range.
+ * Any range covered by ordered extent must either be manually marked
+ * finished (error handling), or has IO submitted (and finish the
+ * ordered extent normally).
+ *
+ * This records the end of ordered extent cleanup if we hit an error.
+ */
+ u64 last_finished_delalloc_end = page_start;
u64 delalloc_start = page_start;
u64 delalloc_end = page_end;
u64 delalloc_to_write = 0;
int ret = 0;
+ int bit;
/* Save the dirty bitmap as our submission bitmap will be a subset of it. */
if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) {
@@ -1140,6 +1198,12 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
bio_ctrl->submit_bitmap = 1;
}
+ for_each_set_bit(bit, &bio_ctrl->submit_bitmap, fs_info->sectors_per_page) {
+ u64 start = page_start + (bit << fs_info->sectorsize_bits);
+
+ btrfs_folio_set_lock(fs_info, folio, start, fs_info->sectorsize);
+ }
+
/* Lock all (subpage) delalloc ranges inside the folio first. */
while (delalloc_start < page_end) {
delalloc_end = page_end;
@@ -1148,9 +1212,8 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
delalloc_start = delalloc_end + 1;
continue;
}
- btrfs_folio_set_writer_lock(fs_info, folio, delalloc_start,
- min(delalloc_end, page_end) + 1 -
- delalloc_start);
+ set_delalloc_bitmap(folio, &delalloc_bitmap, delalloc_start,
+ min(delalloc_end, page_end) + 1 - delalloc_start);
last_delalloc_end = delalloc_end;
delalloc_start = delalloc_end + 1;
}
@@ -1175,7 +1238,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
found_len = last_delalloc_end + 1 - found_start;
found = true;
} else {
- found = btrfs_subpage_find_writer_locked(fs_info, folio,
+ found = find_next_delalloc_bitmap(folio, &delalloc_bitmap,
delalloc_start, &found_start, &found_len);
}
if (!found)
@@ -1189,11 +1252,19 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
found_len = last_delalloc_end + 1 - found_start;
if (ret >= 0) {
+ /*
+ * Some delalloc range may be created by previous folios.
+ * Thus we still need to clean up this range during error
+ * handling.
+ */
+ last_finished_delalloc_end = found_start;
/* No errors hit so far, run the current delalloc range. */
ret = btrfs_run_delalloc_range(inode, folio,
found_start,
found_start + found_len - 1,
wbc);
+ if (ret >= 0)
+ last_finished_delalloc_end = found_start + found_len;
} else {
/*
* We've hit an error during previous delalloc range,
@@ -1228,8 +1299,22 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
delalloc_start = found_start + found_len;
}
- if (ret < 0)
+ /*
+ * It's possible we had some ordered extents created before we hit
+ * an error, cleanup non-async successfully created delalloc ranges.
+ */
+ if (unlikely(ret < 0)) {
+ unsigned int bitmap_size = min(
+ (last_finished_delalloc_end - page_start) >>
+ fs_info->sectorsize_bits,
+ fs_info->sectors_per_page);
+
+ for_each_set_bit(bit, &bio_ctrl->submit_bitmap, bitmap_size)
+ btrfs_mark_ordered_io_finished(inode, folio,
+ page_start + (bit << fs_info->sectorsize_bits),
+ fs_info->sectorsize, false);
return ret;
+ }
out:
if (last_delalloc_end)
delalloc_end = last_delalloc_end;
@@ -1345,6 +1430,7 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned long range_bitmap = 0;
bool submitted_io = false;
+ bool error = false;
const u64 folio_start = folio_pos(folio);
u64 cur;
int bit;
@@ -1387,13 +1473,26 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
break;
}
ret = submit_one_sector(inode, folio, cur, bio_ctrl, i_size);
- if (ret < 0)
- goto out;
+ if (unlikely(ret < 0)) {
+ /*
+ * bio_ctrl may contain a bio crossing several folios.
+ * Submit it immediately so that the bio has a chance
+ * to finish normally, other than marked as error.
+ */
+ submit_one_bio(bio_ctrl);
+ /*
+ * Failed to grab the extent map which should be very rare.
+ * Since there is no bio submitted to finish the ordered
+ * extent, we have to manually finish this sector.
+ */
+ btrfs_mark_ordered_io_finished(inode, folio, cur,
+ fs_info->sectorsize, false);
+ error = true;
+ continue;
+ }
submitted_io = true;
}
- btrfs_folio_assert_not_dirty(fs_info, folio, start, len);
-out:
/*
* If we didn't submitted any sector (>= i_size), folio dirty get
* cleared but PAGECACHE_TAG_DIRTY is not cleared (only cleared
@@ -1401,8 +1500,11 @@ out:
*
* Here we set writeback and clear for the range. If the full folio
* is no longer dirty then we clear the PAGECACHE_TAG_DIRTY tag.
+ *
+ * If we hit any error, the corresponding sector will still be dirty
+ * thus no need to clear PAGECACHE_TAG_DIRTY.
*/
- if (!submitted_io) {
+ if (!submitted_io && !error) {
btrfs_folio_set_writeback(fs_info, folio, start, len);
btrfs_folio_clear_writeback(fs_info, folio, start, len);
}
@@ -1420,15 +1522,14 @@ out:
*/
static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl)
{
- struct inode *inode = folio->mapping->host;
- struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
- const u64 page_start = folio_pos(folio);
+ struct btrfs_inode *inode = BTRFS_I(folio->mapping->host);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
int ret;
size_t pg_offset;
- loff_t i_size = i_size_read(inode);
+ loff_t i_size = i_size_read(&inode->vfs_inode);
unsigned long end_index = i_size >> PAGE_SHIFT;
- trace_extent_writepage(folio, inode, bio_ctrl->wbc);
+ trace_extent_writepage(folio, &inode->vfs_inode, bio_ctrl->wbc);
WARN_ON(!folio_test_locked(folio));
@@ -1452,13 +1553,13 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
if (ret < 0)
goto done;
- ret = writepage_delalloc(BTRFS_I(inode), folio, bio_ctrl);
+ ret = writepage_delalloc(inode, folio, bio_ctrl);
if (ret == 1)
return 0;
if (ret)
goto done;
- ret = extent_writepage_io(BTRFS_I(inode), folio, folio_pos(folio),
+ ret = extent_writepage_io(inode, folio, folio_pos(folio),
PAGE_SIZE, bio_ctrl, i_size);
if (ret == 1)
return 0;
@@ -1466,17 +1567,13 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
bio_ctrl->wbc->nr_to_write--;
done:
- if (ret) {
- btrfs_mark_ordered_io_finished(BTRFS_I(inode), folio,
- page_start, PAGE_SIZE, !ret);
+ if (ret < 0)
mapping_set_error(folio->mapping, ret);
- }
-
/*
* Only unlock ranges that are submitted. As there can be some async
* submitted ranges inside the folio.
*/
- btrfs_folio_end_writer_lock_bitmap(fs_info, folio, bio_ctrl->submit_bitmap);
+ btrfs_folio_end_lock_bitmap(fs_info, folio, bio_ctrl->submit_bitmap);
ASSERT(ret <= 0);
return ret;
}
@@ -1773,7 +1870,7 @@ static int submit_eb_subpage(struct folio *folio, struct writeback_control *wbc)
subpage->bitmaps)) {
spin_unlock_irqrestore(&subpage->lock, flags);
spin_unlock(&folio->mapping->i_private_lock);
- bit_start++;
+ bit_start += sectors_per_node;
continue;
}
@@ -2228,12 +2325,9 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
if (ret == 1)
goto next_page;
- if (ret) {
- btrfs_mark_ordered_io_finished(BTRFS_I(inode), folio,
- cur, cur_len, !ret);
+ if (ret)
mapping_set_error(mapping, ret);
- }
- btrfs_folio_end_writer_lock(fs_info, folio, cur, cur_len);
+ btrfs_folio_end_lock(fs_info, folio, cur, cur_len);
if (ret < 0)
found_error = true;
next_page:
@@ -2268,12 +2362,20 @@ void btrfs_readahead(struct readahead_control *rac)
{
struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ | REQ_RAHEAD };
struct folio *folio;
+ struct btrfs_inode *inode = BTRFS_I(rac->mapping->host);
+ const u64 start = readahead_pos(rac);
+ const u64 end = start + readahead_length(rac) - 1;
+ struct extent_state *cached_state = NULL;
struct extent_map *em_cached = NULL;
u64 prev_em_start = (u64)-1;
+ btrfs_lock_and_flush_ordered_range(inode, start, end, &cached_state);
+
while ((folio = readahead_folio(rac)) != NULL)
btrfs_do_readpage(folio, &em_cached, &bio_ctrl, &prev_em_start);
+ unlock_extent(&inode->io_tree, start, end, &cached_state);
+
if (em_cached)
free_extent_map(em_cached);
submit_one_bio(&bio_ctrl);
@@ -2452,12 +2554,6 @@ static bool folio_range_has_eb(struct btrfs_fs_info *fs_info, struct folio *foli
subpage = folio_get_private(folio);
if (atomic_read(&subpage->eb_refs))
return true;
- /*
- * Even there is no eb refs here, we may still have
- * end_folio_read() call relying on page::private.
- */
- if (atomic_read(&subpage->readers))
- return true;
}
return false;
}
@@ -2730,10 +2826,10 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
return eb;
}
-#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start)
{
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct extent_buffer *eb, *exists = NULL;
int ret;
@@ -2769,8 +2865,11 @@ again:
free_eb:
btrfs_release_extent_buffer(eb);
return exists;
-}
+#else
+ /* Stub to avoid linker error when compiled with optimizations turned off. */
+ return NULL;
#endif
+}
static struct extent_buffer *grab_extent_buffer(
struct btrfs_fs_info *fs_info, struct page *page)