From 19e0c58f6552638c86395f0717210326fdf14fd2 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 19 Jun 2018 15:10:56 -0700 Subject: iomap: generic inline data handling Add generic inline data handling by adding a pointer to the inline data region to struct iomap. When handling a buffered IOMAP_INLINE write, iomap_write_begin will copy the current inline data from the inline data region into the page cache, and iomap_write_end will copy the changes in the page cache back to the inline data region. This doesn't cover inline data reads and direct I/O yet because so far, we have no users. Signed-off-by: Andreas Gruenbacher [hch: small cleanups to better fit in with other iomap work] Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index a044a824da85..10d6cff7f69a 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -55,6 +55,7 @@ struct iomap { u16 flags; /* flags for mapping */ struct block_device *bdev; /* block device for I/O */ struct dax_device *dax_dev; /* dax_dev for dax operations */ + void *inline_data; }; /* -- cgit v1.2.3 From 63899c6f8851c32214b19390254fa1ae90b582df Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jun 2018 15:10:56 -0700 Subject: iomap: add a page_done callback This will be used by gfs2 to attach data to transactions for the journaled data mode. But the concept is generic enough that we might be able to use it for other purposes like encryption/integrity post-processing in the future. Based on a patch from Andreas Gruenbacher. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap.c | 3 +++ include/linux/iomap.h | 9 +++++++++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/fs/iomap.c b/fs/iomap.c index 4aecd7c5dbd8..a1f71e64ea49 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -201,6 +201,9 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len, copied, page, NULL); } + if (iomap->page_done) + iomap->page_done(inode, pos, copied, page, iomap); + if (ret < len) iomap_write_failed(inode, pos, len); return ret; diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 10d6cff7f69a..45f43865b0f0 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -9,6 +9,7 @@ struct fiemap_extent_info; struct inode; struct iov_iter; struct kiocb; +struct page; struct vm_area_struct; struct vm_fault; @@ -56,6 +57,14 @@ struct iomap { struct block_device *bdev; /* block device for I/O */ struct dax_device *dax_dev; /* dax_dev for dax operations */ void *inline_data; + + /* + * Called when finished processing a page in the mapping returned in + * this iomap. At least for now this is only supported in the buffered + * write path. + */ + void (*page_done)(struct inode *inode, loff_t pos, unsigned copied, + struct page *page, struct iomap *iomap); }; /* -- cgit v1.2.3 From e184fde6f3f5353040dd4b5637f823fc87436e55 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 19 Jun 2018 15:10:57 -0700 Subject: iomap: add private pointer to struct iomap Add a private pointer to struct iomap to allow filesystems to pass data from iomap_begin to iomap_end. Will be used by gfs2 for passing on the on-disk inode buffer head. Signed-off-by: Andreas Gruenbacher Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 45f43865b0f0..1f36523d448a 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -57,6 +57,7 @@ struct iomap { struct block_device *bdev; /* block device for I/O */ struct dax_device *dax_dev; /* dax_dev for dax operations */ void *inline_data; + void *private; /* filesystem private */ /* * Called when finished processing a page in the mapping returned in -- cgit v1.2.3 From 72b4daa241295440f98e80ae21294a67b27ca091 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jun 2018 15:10:57 -0700 Subject: iomap: add an iomap-based readpage and readpages implementation Simply use iomap_apply to iterate over the file and a submit a bio for each non-uptodate but mapped region and zero everything else. Note that as-is this can not be used for file systems with a blocksize smaller than the page size, but that support will be added later. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap.c | 214 +++++++++++++++++++++++++++++++++++++++++++++++++- include/linux/iomap.h | 3 + 2 files changed, 216 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/iomap.c b/fs/iomap.c index a1f71e64ea49..4f10c6b1cf6d 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2010 Red Hat, Inc. - * Copyright (c) 2016 Christoph Hellwig. + * Copyright (c) 2016-2018 Christoph Hellwig. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -123,6 +124,217 @@ iomap_read_inline_data(struct inode *inode, struct page *page, SetPageUptodate(page); } +static void +iomap_read_end_io(struct bio *bio) +{ + int error = blk_status_to_errno(bio->bi_status); + struct bio_vec *bvec; + int i; + + bio_for_each_segment_all(bvec, bio, i) + page_endio(bvec->bv_page, false, error); + bio_put(bio); +} + +struct iomap_readpage_ctx { + struct page *cur_page; + bool cur_page_in_bio; + bool is_readahead; + struct bio *bio; + struct list_head *pages; +}; + +static loff_t +iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, + struct iomap *iomap) +{ + struct iomap_readpage_ctx *ctx = data; + struct page *page = ctx->cur_page; + unsigned poff = pos & (PAGE_SIZE - 1); + unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length); + bool is_contig = false; + sector_t sector; + + /* we don't support blocksize < PAGE_SIZE quite yet. */ + WARN_ON_ONCE(pos != page_offset(page)); + WARN_ON_ONCE(plen != PAGE_SIZE); + + if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) { + zero_user(page, poff, plen); + SetPageUptodate(page); + goto done; + } + + ctx->cur_page_in_bio = true; + + /* + * Try to merge into a previous segment if we can. + */ + sector = iomap_sector(iomap, pos); + if (ctx->bio && bio_end_sector(ctx->bio) == sector) { + if (__bio_try_merge_page(ctx->bio, page, plen, poff)) + goto done; + is_contig = true; + } + + if (!ctx->bio || !is_contig || bio_full(ctx->bio)) { + gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); + int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT; + + if (ctx->bio) + submit_bio(ctx->bio); + + if (ctx->is_readahead) /* same as readahead_gfp_mask */ + gfp |= __GFP_NORETRY | __GFP_NOWARN; + ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs)); + ctx->bio->bi_opf = REQ_OP_READ; + if (ctx->is_readahead) + ctx->bio->bi_opf |= REQ_RAHEAD; + ctx->bio->bi_iter.bi_sector = sector; + bio_set_dev(ctx->bio, iomap->bdev); + ctx->bio->bi_end_io = iomap_read_end_io; + } + + __bio_add_page(ctx->bio, page, plen, poff); +done: + return plen; +} + +int +iomap_readpage(struct page *page, const struct iomap_ops *ops) +{ + struct iomap_readpage_ctx ctx = { .cur_page = page }; + struct inode *inode = page->mapping->host; + unsigned poff; + loff_t ret; + + WARN_ON_ONCE(page_has_buffers(page)); + + for (poff = 0; poff < PAGE_SIZE; poff += ret) { + ret = iomap_apply(inode, page_offset(page) + poff, + PAGE_SIZE - poff, 0, ops, &ctx, + iomap_readpage_actor); + if (ret <= 0) { + WARN_ON_ONCE(ret == 0); + SetPageError(page); + break; + } + } + + if (ctx.bio) { + submit_bio(ctx.bio); + WARN_ON_ONCE(!ctx.cur_page_in_bio); + } else { + WARN_ON_ONCE(ctx.cur_page_in_bio); + unlock_page(page); + } + + /* + * Just like mpage_readpages and block_read_full_page we always + * return 0 and just mark the page as PageError on errors. This + * should be cleaned up all through the stack eventually. + */ + return 0; +} +EXPORT_SYMBOL_GPL(iomap_readpage); + +static struct page * +iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos, + loff_t length, loff_t *done) +{ + while (!list_empty(pages)) { + struct page *page = lru_to_page(pages); + + if (page_offset(page) >= (u64)pos + length) + break; + + list_del(&page->lru); + if (!add_to_page_cache_lru(page, inode->i_mapping, page->index, + GFP_NOFS)) + return page; + + /* + * If we already have a page in the page cache at index we are + * done. Upper layers don't care if it is uptodate after the + * readpages call itself as every page gets checked again once + * actually needed. + */ + *done += PAGE_SIZE; + put_page(page); + } + + return NULL; +} + +static loff_t +iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length, + void *data, struct iomap *iomap) +{ + struct iomap_readpage_ctx *ctx = data; + loff_t done, ret; + + for (done = 0; done < length; done += ret) { + if (ctx->cur_page && ((pos + done) & (PAGE_SIZE - 1)) == 0) { + if (!ctx->cur_page_in_bio) + unlock_page(ctx->cur_page); + put_page(ctx->cur_page); + ctx->cur_page = NULL; + } + if (!ctx->cur_page) { + ctx->cur_page = iomap_next_page(inode, ctx->pages, + pos, length, &done); + if (!ctx->cur_page) + break; + ctx->cur_page_in_bio = false; + } + ret = iomap_readpage_actor(inode, pos + done, length - done, + ctx, iomap); + } + + return done; +} + +int +iomap_readpages(struct address_space *mapping, struct list_head *pages, + unsigned nr_pages, const struct iomap_ops *ops) +{ + struct iomap_readpage_ctx ctx = { + .pages = pages, + .is_readahead = true, + }; + loff_t pos = page_offset(list_entry(pages->prev, struct page, lru)); + loff_t last = page_offset(list_entry(pages->next, struct page, lru)); + loff_t length = last - pos + PAGE_SIZE, ret = 0; + + while (length > 0) { + ret = iomap_apply(mapping->host, pos, length, 0, ops, + &ctx, iomap_readpages_actor); + if (ret <= 0) { + WARN_ON_ONCE(ret == 0); + goto done; + } + pos += ret; + length -= ret; + } + ret = 0; +done: + if (ctx.bio) + submit_bio(ctx.bio); + if (ctx.cur_page) { + if (!ctx.cur_page_in_bio) + unlock_page(ctx.cur_page); + put_page(ctx.cur_page); + } + + /* + * Check that we didn't lose a page due to the arcance calling + * conventions.. + */ + WARN_ON_ONCE(!ret && !list_empty(ctx.pages)); + return ret; +} +EXPORT_SYMBOL_GPL(iomap_readpages); + static void iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) { diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 1f36523d448a..30d314407f66 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -99,6 +99,9 @@ struct iomap_ops { ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops); +int iomap_readpage(struct page *page, const struct iomap_ops *ops); +int iomap_readpages(struct address_space *mapping, struct list_head *pages, + unsigned nr_pages, const struct iomap_ops *ops); int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, -- cgit v1.2.3 From c03cea42149de56fbae2301d7123daaa2cfe80e2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jun 2018 15:10:58 -0700 Subject: iomap: add initial support for writes without buffer heads For now just limited to blocksize == PAGE_SIZE, where we can simply read in the full page in write begin, and just set the whole page dirty after copying data into it. This code is enabled by default and XFS will now be feed pages without buffer heads in ->writepage and ->writepages. If a file system sets the IOMAP_F_BUFFER_HEAD flag on the iomap the old path will still be used, this both helps the transition in XFS and prepares for the gfs2 migration to the iomap infrastructure. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++--- fs/xfs/xfs_iomap.c | 6 ++- include/linux/iomap.h | 2 + 3 files changed, 114 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/iomap.c b/fs/iomap.c index 4f10c6b1cf6d..2ebff76039b5 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -348,6 +348,48 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) truncate_pagecache_range(inode, max(pos, i_size), pos + len); } +static int +iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page, + unsigned poff, unsigned plen, unsigned from, unsigned to, + struct iomap *iomap) +{ + struct bio_vec bvec; + struct bio bio; + + if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) { + zero_user_segments(page, poff, from, to, poff + plen); + return 0; + } + + bio_init(&bio, &bvec, 1); + bio.bi_opf = REQ_OP_READ; + bio.bi_iter.bi_sector = iomap_sector(iomap, block_start); + bio_set_dev(&bio, iomap->bdev); + __bio_add_page(&bio, page, plen, poff); + return submit_bio_wait(&bio); +} + +static int +__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, + struct page *page, struct iomap *iomap) +{ + loff_t block_size = i_blocksize(inode); + loff_t block_start = pos & ~(block_size - 1); + loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1); + unsigned poff = block_start & (PAGE_SIZE - 1); + unsigned plen = min_t(loff_t, PAGE_SIZE - poff, block_end - block_start); + unsigned from = pos & (PAGE_SIZE - 1), to = from + len; + + WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE); + + if (PageUptodate(page)) + return 0; + if (from <= poff && to >= poff + plen) + return 0; + return iomap_read_page_sync(inode, block_start, page, + poff, plen, from, to, iomap); +} + static int iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, struct page **pagep, struct iomap *iomap) @@ -367,9 +409,10 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, if (iomap->type == IOMAP_INLINE) iomap_read_inline_data(inode, page, iomap); - else + else if (iomap->flags & IOMAP_F_BUFFER_HEAD) status = __block_write_begin_int(page, pos, len, NULL, iomap); - + else + status = __iomap_write_begin(inode, pos, len, page, iomap); if (unlikely(status)) { unlock_page(page); put_page(page); @@ -382,6 +425,57 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, return status; } +int +iomap_set_page_dirty(struct page *page) +{ + struct address_space *mapping = page_mapping(page); + int newly_dirty; + + if (unlikely(!mapping)) + return !TestSetPageDirty(page); + + /* + * Lock out page->mem_cgroup migration to keep PageDirty + * synchronized with per-memcg dirty page counters. + */ + lock_page_memcg(page); + newly_dirty = !TestSetPageDirty(page); + if (newly_dirty) + __set_page_dirty(page, mapping, 0); + unlock_page_memcg(page); + + if (newly_dirty) + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + return newly_dirty; +} +EXPORT_SYMBOL_GPL(iomap_set_page_dirty); + +static int +__iomap_write_end(struct inode *inode, loff_t pos, unsigned len, + unsigned copied, struct page *page, struct iomap *iomap) +{ + flush_dcache_page(page); + + /* + * The blocks that were entirely written will now be uptodate, so we + * don't have to worry about a readpage reading them and overwriting a + * partial write. However if we have encountered a short write and only + * partially written into a block, it will not be marked uptodate, so a + * readpage might come in and destroy our partial write. + * + * Do the simplest thing, and just treat any short write to a non + * uptodate page as a zero-length write, and force the caller to redo + * the whole thing. + */ + if (unlikely(copied < len && !PageUptodate(page))) { + copied = 0; + } else { + SetPageUptodate(page); + iomap_set_page_dirty(page); + } + return __generic_write_end(inode, pos, copied, page); +} + static int iomap_write_end_inline(struct inode *inode, struct page *page, struct iomap *iomap, loff_t pos, unsigned copied) @@ -408,9 +502,11 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len, if (iomap->type == IOMAP_INLINE) { ret = iomap_write_end_inline(inode, page, iomap, pos, copied); - } else { + } else if (iomap->flags & IOMAP_F_BUFFER_HEAD) { ret = generic_write_end(NULL, inode->i_mapping, pos, len, copied, page, NULL); + } else { + ret = __iomap_write_end(inode, pos, len, copied, page, iomap); } if (iomap->page_done) @@ -703,11 +799,16 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, struct page *page = data; int ret; - ret = __block_write_begin_int(page, pos, length, NULL, iomap); - if (ret) - return ret; + if (iomap->flags & IOMAP_F_BUFFER_HEAD) { + ret = __block_write_begin_int(page, pos, length, NULL, iomap); + if (ret) + return ret; + block_commit_write(page, 0, length); + } else { + WARN_ON_ONCE(!PageUptodate(page)); + WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE); + } - block_commit_write(page, 0, length); return length; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 49f5492eed3b..8a3613d576af 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -626,7 +626,7 @@ retry: * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch * them out if the write happens to fail. */ - iomap->flags = IOMAP_F_NEW; + iomap->flags |= IOMAP_F_NEW; trace_xfs_iomap_alloc(ip, offset, count, 0, &got); done: if (isnullstartblock(got.br_startblock)) @@ -1019,6 +1019,8 @@ xfs_file_iomap_begin( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; + iomap->flags |= IOMAP_F_BUFFER_HEAD; + if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) && !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { /* Reserve delalloc blocks for regular writeback. */ @@ -1119,7 +1121,7 @@ xfs_file_iomap_begin( if (error) return error; - iomap->flags = IOMAP_F_NEW; + iomap->flags |= IOMAP_F_NEW; trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); out_finish: diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 30d314407f66..5eb9ca8d7ce5 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -30,6 +30,7 @@ struct vm_fault; */ #define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ #define IOMAP_F_DIRTY 0x02 /* uncommitted metadata */ +#define IOMAP_F_BUFFER_HEAD 0x04 /* file system requires buffer heads */ /* * Flags that only need to be reported for IOMAP_REPORT requests: @@ -102,6 +103,7 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, int iomap_readpage(struct page *page, const struct iomap_ops *ops); int iomap_readpages(struct address_space *mapping, struct list_head *pages, unsigned nr_pages, const struct iomap_ops *ops); +int iomap_set_page_dirty(struct page *page); int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, -- cgit v1.2.3