summaryrefslogtreecommitdiff
path: root/fs/iomap.c
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2018-12-04 19:38:05 +0300
committerJens Axboe <axboe@kernel.dk>2018-12-04 19:38:05 +0300
commit89d04ec3491bea6d69943b5aed62fdebfad49638 (patch)
tree2388242bc908a4bca33878e109b6f0828069c679 /fs/iomap.c
parent8c2def893afc60d88160d524acf345765cf0c447 (diff)
parent2595646791c319cadfdbf271563aac97d0843dc7 (diff)
downloadlinux-89d04ec3491bea6d69943b5aed62fdebfad49638.tar.xz
Merge tag 'v4.20-rc5' into for-4.21/block
Pull in v4.20-rc5, solving a conflict we'll otherwise get in aio.c and also getting the merge fix that went into mainline that users are hitting testing for-4.21/block and/or for-next. * tag 'v4.20-rc5': (664 commits) Linux 4.20-rc5 PCI: Fix incorrect value returned from pcie_get_speed_cap() MAINTAINERS: Update linux-mips mailing list address ocfs2: fix potential use after free mm/khugepaged: fix the xas_create_range() error path mm/khugepaged: collapse_shmem() do not crash on Compound mm/khugepaged: collapse_shmem() without freezing new_page mm/khugepaged: minor reorderings in collapse_shmem() mm/khugepaged: collapse_shmem() remember to clear holes mm/khugepaged: fix crashes due to misaccounted holes mm/khugepaged: collapse_shmem() stop if punched or truncated mm/huge_memory: fix lockdep complaint on 32-bit i_size_read() mm/huge_memory: splitting set mapping+index before unfreeze mm/huge_memory: rename freeze_page() to unmap_page() initramfs: clean old path before creating a hardlink kernel/kcov.c: mark funcs in __sanitizer_cov_trace_pc() as notrace psi: make disabling/enabling easier for vendor kernels proc: fixup map_files test on arm debugobjects: avoid recursive calls with kmemleak userfaultfd: shmem: UFFDIO_COPY: set the page dirty if VM_WRITE is not set ...
Diffstat (limited to 'fs/iomap.c')
-rw-r--r--fs/iomap.c53
1 files changed, 41 insertions, 12 deletions
diff --git a/fs/iomap.c b/fs/iomap.c
index 74c1f37f0fd6..d094e5688bd3 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -142,13 +142,14 @@ static void
iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp)
{
+ loff_t orig_pos = *pos;
+ loff_t isize = i_size_read(inode);
unsigned block_bits = inode->i_blkbits;
unsigned block_size = (1 << block_bits);
unsigned poff = offset_in_page(*pos);
unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
unsigned first = poff >> block_bits;
unsigned last = (poff + plen - 1) >> block_bits;
- unsigned end = offset_in_page(i_size_read(inode)) >> block_bits;
/*
* If the block size is smaller than the page size we need to check the
@@ -183,8 +184,12 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
* handle both halves separately so that we properly zero data in the
* page cache for blocks that are entirely outside of i_size.
*/
- if (first <= end && last > end)
- plen -= (last - end) * block_size;
+ if (orig_pos <= isize && orig_pos + length > isize) {
+ unsigned end = offset_in_page(isize - 1) >> block_bits;
+
+ if (first <= end && last > end)
+ plen -= (last - end) * block_size;
+ }
*offp = poff;
*lenp = plen;
@@ -1584,7 +1589,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
struct bio *bio;
bool need_zeroout = false;
bool use_fua = false;
- int nr_pages, ret;
+ int nr_pages, ret = 0;
size_t copied = 0;
if ((pos | length | align) & ((1 << blkbits) - 1))
@@ -1600,12 +1605,13 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
if (iomap->flags & IOMAP_F_NEW) {
need_zeroout = true;
- } else {
+ } else if (iomap->type == IOMAP_MAPPED) {
/*
- * Use a FUA write if we need datasync semantics, this
- * is a pure data IO that doesn't require any metadata
- * updates and the underlying device supports FUA. This
- * allows us to avoid cache flushes on IO completion.
+ * Use a FUA write if we need datasync semantics, this is a pure
+ * data IO that doesn't require any metadata updates (including
+ * after IO completion such as unwritten extent conversion) and
+ * the underlying device supports FUA. This allows us to avoid
+ * cache flushes on IO completion.
*/
if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
(dio->flags & IOMAP_DIO_WRITE_FUA) &&
@@ -1648,8 +1654,14 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
ret = bio_iov_iter_get_pages(bio, &iter);
if (unlikely(ret)) {
+ /*
+ * We have to stop part way through an IO. We must fall
+ * through to the sub-block tail zeroing here, otherwise
+ * this short IO may expose stale data in the tail of
+ * the block we haven't written data to.
+ */
bio_put(bio);
- return copied ? copied : ret;
+ goto zero_tail;
}
n = bio->bi_iter.bi_size;
@@ -1683,13 +1695,21 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
dio->submit.cookie = submit_bio(bio);
} while (nr_pages);
- if (need_zeroout) {
+ /*
+ * We need to zeroout the tail of a sub-block write if the extent type
+ * requires zeroing or the write extends beyond EOF. If we don't zero
+ * the block tail in the latter case, we can expose stale data via mmap
+ * reads of the EOF block.
+ */
+zero_tail:
+ if (need_zeroout ||
+ ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) {
/* zero out from the end of the write to the end of the block */
pad = pos & (fs_block_size - 1);
if (pad)
iomap_dio_zero(dio, iomap, pos, fs_block_size - pad);
}
- return copied;
+ return copied ? copied : ret;
}
static loff_t
@@ -1864,6 +1884,15 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->wait_for_completion = true;
ret = 0;
}
+
+ /*
+ * Splicing to pipes can fail on a full pipe. We have to
+ * swallow this to make it look like a short IO
+ * otherwise the higher splice layers will completely
+ * mishandle the error and stop moving data.
+ */
+ if (ret == -EFAULT)
+ ret = 0;
break;
}
pos += ret;