summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-07-15 23:28:14 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2024-07-15 23:28:14 +0300
commit4f5e249ec0ea8872e1644df23cffffbe28007188 (patch)
treeecb7066ea436d502889c86ba27c598fc0947d4cf /fs
parent98f3a9a4fd449641010c77abca16aebb0b8d4419 (diff)
parent602f09f4029c7b5e1a2f44a7651ac8922a904a1b (diff)
downloadlinux-4f5e249ec0ea8872e1644df23cffffbe28007188.tar.xz
Merge tag 'vfs-6.11.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull iomap updates from Christian Brauner: "This contains some minor work for the iomap subsystem: - Add documentation on the design of iomap and how to port to it - Optimize iomap_read_folio() - Bring back the change to iomap_write_end() to no increase i_size. This is accompanied by a change to xfs to reserve blocks for truncating large realtime inodes to avoid exposing stale data when iomap_write_end() stops increasing i_size" * tag 'vfs-6.11.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: iomap: don't increase i_size in iomap_write_end() xfs: reserve blocks for truncating large realtime inode Documentation: the design of iomap and how to port iomap: Optimize iomap_read_folio
Diffstat (limited to 'fs')
-rw-r--r--fs/iomap/buffered-io.c73
-rw-r--r--fs/xfs/xfs_iops.c15
2 files changed, 62 insertions, 26 deletions
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 95750fc8c66f..f420c53d86ac 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -442,6 +442,24 @@ done:
return pos - orig_pos + plen;
}
+static loff_t iomap_read_folio_iter(const struct iomap_iter *iter,
+ struct iomap_readpage_ctx *ctx)
+{
+ struct folio *folio = ctx->cur_folio;
+ size_t offset = offset_in_folio(folio, iter->pos);
+ loff_t length = min_t(loff_t, folio_size(folio) - offset,
+ iomap_length(iter));
+ loff_t done, ret;
+
+ for (done = 0; done < length; done += ret) {
+ ret = iomap_readpage_iter(iter, ctx, done);
+ if (ret <= 0)
+ return ret;
+ }
+
+ return done;
+}
+
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
{
struct iomap_iter iter = {
@@ -457,7 +475,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
trace_iomap_readpage(iter.inode, 1);
while ((ret = iomap_iter(&iter, ops)) > 0)
- iter.processed = iomap_readpage_iter(&iter, &ctx, 0);
+ iter.processed = iomap_read_folio_iter(&iter, &ctx);
if (ctx.bio) {
submit_bio(ctx.bio);
@@ -872,37 +890,22 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
size_t copied, struct folio *folio)
{
const struct iomap *srcmap = iomap_iter_srcmap(iter);
- loff_t old_size = iter->inode->i_size;
- size_t written;
if (srcmap->type == IOMAP_INLINE) {
iomap_write_end_inline(iter, folio, pos, copied);
- written = copied;
- } else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
- written = block_write_end(NULL, iter->inode->i_mapping, pos,
- len, copied, &folio->page, NULL);
- WARN_ON_ONCE(written != copied && written != 0);
- } else {
- written = __iomap_write_end(iter->inode, pos, len, copied,
- folio) ? copied : 0;
+ return true;
}
- /*
- * Update the in-memory inode size after copying the data into the page
- * cache. It's up to the file system to write the updated size to disk,
- * preferably after I/O completion so that no stale data is exposed.
- * Only once that's done can we unlock and release the folio.
- */
- if (pos + written > old_size) {
- i_size_write(iter->inode, pos + written);
- iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
- }
- __iomap_put_folio(iter, pos, written, folio);
+ if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
+ size_t bh_written;
- if (old_size < pos)
- pagecache_isize_extended(iter->inode, old_size, pos);
+ bh_written = block_write_end(NULL, iter->inode->i_mapping, pos,
+ len, copied, &folio->page, NULL);
+ WARN_ON_ONCE(bh_written != copied && bh_written != 0);
+ return bh_written == copied;
+ }
- return written == copied;
+ return __iomap_write_end(iter->inode, pos, len, copied, folio);
}
static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
@@ -917,6 +920,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
do {
struct folio *folio;
+ loff_t old_size;
size_t offset; /* Offset into folio */
size_t bytes; /* Bytes to write to folio */
size_t copied; /* Bytes copied from user */
@@ -968,6 +972,23 @@ retry:
written = iomap_write_end(iter, pos, bytes, copied, folio) ?
copied : 0;
+ /*
+ * Update the in-memory inode size after copying the data into
+ * the page cache. It's up to the file system to write the
+ * updated size to disk, preferably after I/O completion so that
+ * no stale data is exposed. Only once that's done can we
+ * unlock and release the folio.
+ */
+ old_size = iter->inode->i_size;
+ if (pos + written > old_size) {
+ i_size_write(iter->inode, pos + written);
+ iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
+ }
+ __iomap_put_folio(iter, pos, written, folio);
+
+ if (old_size < pos)
+ pagecache_isize_extended(iter->inode, old_size, pos);
+
cond_resched();
if (unlikely(written == 0)) {
/*
@@ -1338,6 +1359,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
bytes = folio_size(folio) - offset;
ret = iomap_write_end(iter, pos, bytes, bytes, folio);
+ __iomap_put_folio(iter, pos, bytes, folio);
if (WARN_ON_ONCE(!ret))
return -EIO;
@@ -1403,6 +1425,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
folio_mark_accessed(folio);
ret = iomap_write_end(iter, pos, bytes, bytes, folio);
+ __iomap_put_folio(iter, pos, bytes, folio);
if (WARN_ON_ONCE(!ret))
return -EIO;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ff222827e550..a00dcbc77e12 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -17,6 +17,8 @@
#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_trans.h"
+#include "xfs_trans_space.h"
+#include "xfs_bmap_btree.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_symlink.h"
@@ -811,6 +813,7 @@ xfs_setattr_size(
struct xfs_trans *tp;
int error;
uint lock_flags = 0;
+ uint resblks = 0;
bool did_zeroing = false;
xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
@@ -917,7 +920,17 @@ xfs_setattr_size(
return error;
}
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+ /*
+ * For realtime inode with more than one block rtextsize, we need the
+ * block reservation for bmap btree block allocations/splits that can
+ * happen since it could split the tail written extent and convert the
+ * right beyond EOF one to unwritten.
+ */
+ if (xfs_inode_has_bigrtalloc(ip))
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks,
+ 0, 0, &tp);
if (error)
return error;