diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-15 23:28:14 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-15 23:28:14 +0300 |
commit | 4f5e249ec0ea8872e1644df23cffffbe28007188 (patch) | |
tree | ecb7066ea436d502889c86ba27c598fc0947d4cf /fs | |
parent | 98f3a9a4fd449641010c77abca16aebb0b8d4419 (diff) | |
parent | 602f09f4029c7b5e1a2f44a7651ac8922a904a1b (diff) | |
download | linux-4f5e249ec0ea8872e1644df23cffffbe28007188.tar.xz |
Merge tag 'vfs-6.11.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull iomap updates from Christian Brauner:
"This contains some minor work for the iomap subsystem:
- Add documentation on the design of iomap and how to port to it
- Optimize iomap_read_folio()
- Bring back the change to iomap_write_end() to no increase i_size.
This is accompanied by a change to xfs to reserve blocks for
truncating large realtime inodes to avoid exposing stale data when
iomap_write_end() stops increasing i_size"
* tag 'vfs-6.11.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
iomap: don't increase i_size in iomap_write_end()
xfs: reserve blocks for truncating large realtime inode
Documentation: the design of iomap and how to port
iomap: Optimize iomap_read_folio
Diffstat (limited to 'fs')
-rw-r--r-- | fs/iomap/buffered-io.c | 73 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c | 15 |
2 files changed, 62 insertions, 26 deletions
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 95750fc8c66f..f420c53d86ac 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -442,6 +442,24 @@ done: return pos - orig_pos + plen; } +static loff_t iomap_read_folio_iter(const struct iomap_iter *iter, + struct iomap_readpage_ctx *ctx) +{ + struct folio *folio = ctx->cur_folio; + size_t offset = offset_in_folio(folio, iter->pos); + loff_t length = min_t(loff_t, folio_size(folio) - offset, + iomap_length(iter)); + loff_t done, ret; + + for (done = 0; done < length; done += ret) { + ret = iomap_readpage_iter(iter, ctx, done); + if (ret <= 0) + return ret; + } + + return done; +} + int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) { struct iomap_iter iter = { @@ -457,7 +475,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) trace_iomap_readpage(iter.inode, 1); while ((ret = iomap_iter(&iter, ops)) > 0) - iter.processed = iomap_readpage_iter(&iter, &ctx, 0); + iter.processed = iomap_read_folio_iter(&iter, &ctx); if (ctx.bio) { submit_bio(ctx.bio); @@ -872,37 +890,22 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, size_t copied, struct folio *folio) { const struct iomap *srcmap = iomap_iter_srcmap(iter); - loff_t old_size = iter->inode->i_size; - size_t written; if (srcmap->type == IOMAP_INLINE) { iomap_write_end_inline(iter, folio, pos, copied); - written = copied; - } else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { - written = block_write_end(NULL, iter->inode->i_mapping, pos, - len, copied, &folio->page, NULL); - WARN_ON_ONCE(written != copied && written != 0); - } else { - written = __iomap_write_end(iter->inode, pos, len, copied, - folio) ? copied : 0; + return true; } - /* - * Update the in-memory inode size after copying the data into the page - * cache. It's up to the file system to write the updated size to disk, - * preferably after I/O completion so that no stale data is exposed. - * Only once that's done can we unlock and release the folio. - */ - if (pos + written > old_size) { - i_size_write(iter->inode, pos + written); - iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; - } - __iomap_put_folio(iter, pos, written, folio); + if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { + size_t bh_written; - if (old_size < pos) - pagecache_isize_extended(iter->inode, old_size, pos); + bh_written = block_write_end(NULL, iter->inode->i_mapping, pos, + len, copied, &folio->page, NULL); + WARN_ON_ONCE(bh_written != copied && bh_written != 0); + return bh_written == copied; + } - return written == copied; + return __iomap_write_end(iter->inode, pos, len, copied, folio); } static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) @@ -917,6 +920,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) do { struct folio *folio; + loff_t old_size; size_t offset; /* Offset into folio */ size_t bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ @@ -968,6 +972,23 @@ retry: written = iomap_write_end(iter, pos, bytes, copied, folio) ? copied : 0; + /* + * Update the in-memory inode size after copying the data into + * the page cache. It's up to the file system to write the + * updated size to disk, preferably after I/O completion so that + * no stale data is exposed. Only once that's done can we + * unlock and release the folio. + */ + old_size = iter->inode->i_size; + if (pos + written > old_size) { + i_size_write(iter->inode, pos + written); + iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; + } + __iomap_put_folio(iter, pos, written, folio); + + if (old_size < pos) + pagecache_isize_extended(iter->inode, old_size, pos); + cond_resched(); if (unlikely(written == 0)) { /* @@ -1338,6 +1359,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) bytes = folio_size(folio) - offset; ret = iomap_write_end(iter, pos, bytes, bytes, folio); + __iomap_put_folio(iter, pos, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; @@ -1403,6 +1425,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) folio_mark_accessed(folio); ret = iomap_write_end(iter, pos, bytes, bytes, folio); + __iomap_put_folio(iter, pos, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ff222827e550..a00dcbc77e12 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -17,6 +17,8 @@ #include "xfs_da_btree.h" #include "xfs_attr.h" #include "xfs_trans.h" +#include "xfs_trans_space.h" +#include "xfs_bmap_btree.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_symlink.h" @@ -811,6 +813,7 @@ xfs_setattr_size( struct xfs_trans *tp; int error; uint lock_flags = 0; + uint resblks = 0; bool did_zeroing = false; xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL); @@ -917,7 +920,17 @@ xfs_setattr_size( return error; } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); + /* + * For realtime inode with more than one block rtextsize, we need the + * block reservation for bmap btree block allocations/splits that can + * happen since it could split the tail written extent and convert the + * right beyond EOF one to unwritten. + */ + if (xfs_inode_has_bigrtalloc(ip)) + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks, + 0, 0, &tp); if (error) return error; |