From 85f5a74c2b9ba213d4102dc12ccbfdbe26472abb Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 8 Apr 2021 01:33:45 -0400 Subject: block: Add bio_add_folio() This is a thin wrapper around bio_add_page(). The main advantage here is the documentation that folios larger than 2GiB are not supported. It's not currently possible to allocate folios that large, but if it ever becomes possible, this function will fail gracefully instead of doing I/O to the wrong bytes. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Jens Axboe Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- block/bio.c | 22 ++++++++++++++++++++++ include/linux/bio.h | 3 ++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index 15ab0d6d1c06..4b3087e20d51 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1033,6 +1033,28 @@ int bio_add_page(struct bio *bio, struct page *page, } EXPORT_SYMBOL(bio_add_page); +/** + * bio_add_folio - Attempt to add part of a folio to a bio. + * @bio: BIO to add to. + * @folio: Folio to add. + * @len: How many bytes from the folio to add. + * @off: First byte in this folio to add. + * + * Filesystems that use folios can call this function instead of calling + * bio_add_page() for each page in the folio. If @off is bigger than + * PAGE_SIZE, this function can create a bio_vec that starts in a page + * after the bv_page. BIOs do not support folios that are 4GiB or larger. + * + * Return: Whether the addition was successful. + */ +bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len, + size_t off) +{ + if (len > UINT_MAX || off > UINT_MAX) + return 0; + return bio_add_page(bio, &folio->page, len, off) > 0; +} + void __bio_release_pages(struct bio *bio, bool mark_dirty) { struct bvec_iter_all iter_all; diff --git a/include/linux/bio.h b/include/linux/bio.h index fe6bdfbbef66..a783cac49978 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -409,7 +409,8 @@ extern void bio_uninit(struct bio *); extern void bio_reset(struct bio *); void bio_chain(struct bio *, struct bio *); -extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); +int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off); +bool bio_add_folio(struct bio *, struct folio *, size_t len, size_t off); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); int bio_add_zone_append_page(struct bio *bio, struct page *page, -- cgit v1.2.3 From 640d1930bef4f87ec8d8d2b05f0f6edc1dfcf662 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 4 Jan 2021 10:58:17 -0500 Subject: block: Add bio_for_each_folio_all() Allow callers to iterate over each folio instead of each page. The bio need not have been constructed using folios originally. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Jens Axboe Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- Documentation/core-api/kernel-api.rst | 1 + include/linux/bio.h | 53 ++++++++++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst index 2e7186805148..7f0cb604b6ab 100644 --- a/Documentation/core-api/kernel-api.rst +++ b/Documentation/core-api/kernel-api.rst @@ -279,6 +279,7 @@ Accounting Framework Block Devices ============= +.. kernel-doc:: include/linux/bio.h .. kernel-doc:: block/blk-core.c :export: diff --git a/include/linux/bio.h b/include/linux/bio.h index a783cac49978..e3c9e8207f12 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -166,7 +166,7 @@ static inline void bio_advance(struct bio *bio, unsigned int nbytes) */ #define bio_for_each_bvec_all(bvl, bio, i) \ for (i = 0, bvl = bio_first_bvec_all(bio); \ - i < (bio)->bi_vcnt; i++, bvl++) \ + i < (bio)->bi_vcnt; i++, bvl++) #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) @@ -260,6 +260,57 @@ static inline struct bio_vec *bio_last_bvec_all(struct bio *bio) return &bio->bi_io_vec[bio->bi_vcnt - 1]; } +/** + * struct folio_iter - State for iterating all folios in a bio. + * @folio: The current folio we're iterating. NULL after the last folio. + * @offset: The byte offset within the current folio. + * @length: The number of bytes in this iteration (will not cross folio + * boundary). + */ +struct folio_iter { + struct folio *folio; + size_t offset; + size_t length; + /* private: for use by the iterator */ + size_t _seg_count; + int _i; +}; + +static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, + int i) +{ + struct bio_vec *bvec = bio_first_bvec_all(bio) + i; + + fi->folio = page_folio(bvec->bv_page); + fi->offset = bvec->bv_offset + + PAGE_SIZE * (bvec->bv_page - &fi->folio->page); + fi->_seg_count = bvec->bv_len; + fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); + fi->_i = i; +} + +static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio) +{ + fi->_seg_count -= fi->length; + if (fi->_seg_count) { + fi->folio = folio_next(fi->folio); + fi->offset = 0; + fi->length = min(folio_size(fi->folio), fi->_seg_count); + } else if (fi->_i + 1 < bio->bi_vcnt) { + bio_first_folio(fi, bio, fi->_i + 1); + } else { + fi->folio = NULL; + } +} + +/** + * bio_for_each_folio_all - Iterate over each folio in a bio. + * @fi: struct folio_iter which is updated for each folio. + * @bio: struct bio to iterate over. + */ +#define bio_for_each_folio_all(fi, bio) \ + for (bio_first_folio(&fi, bio, 0); fi.folio; bio_next_folio(&fi, bio)) + enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ -- cgit v1.2.3 From d1bd0b4ebfe0521964e6937195bd2f76866660c7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 3 Nov 2021 14:05:47 -0400 Subject: fs/buffer: Convert __block_write_begin_int() to take a folio There are no plans to convert buffer_head infrastructure to use large folios, but __block_write_begin_int() is called from iomap, and it's more convenient and less error-prone if we pass in a folio from iomap. It also has a nice saving of almost 200 bytes of code from removing repeated calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/buffer.c | 23 ++++++++++++----------- fs/internal.h | 2 +- fs/iomap/buffered-io.c | 7 +++++-- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 46bc589b7a03..8e112b6bd371 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1969,34 +1969,34 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, } } -int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, +int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block, const struct iomap *iomap) { unsigned from = pos & (PAGE_SIZE - 1); unsigned to = from + len; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; unsigned block_start, block_end; sector_t block; int err = 0; unsigned blocksize, bbits; struct buffer_head *bh, *head, *wait[2], **wait_bh=wait; - BUG_ON(!PageLocked(page)); + BUG_ON(!folio_test_locked(folio)); BUG_ON(from > PAGE_SIZE); BUG_ON(to > PAGE_SIZE); BUG_ON(from > to); - head = create_page_buffers(page, inode, 0); + head = create_page_buffers(&folio->page, inode, 0); blocksize = head->b_size; bbits = block_size_bits(blocksize); - block = (sector_t)page->index << (PAGE_SHIFT - bbits); + block = (sector_t)folio->index << (PAGE_SHIFT - bbits); for(bh = head, block_start = 0; bh != head || !block_start; block++, block_start=block_end, bh = bh->b_this_page) { block_end = block_start + blocksize; if (block_end <= from || block_start >= to) { - if (PageUptodate(page)) { + if (folio_test_uptodate(folio)) { if (!buffer_uptodate(bh)) set_buffer_uptodate(bh); } @@ -2016,20 +2016,20 @@ int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, if (buffer_new(bh)) { clean_bdev_bh_alias(bh); - if (PageUptodate(page)) { + if (folio_test_uptodate(folio)) { clear_buffer_new(bh); set_buffer_uptodate(bh); mark_buffer_dirty(bh); continue; } if (block_end > to || block_start < from) - zero_user_segments(page, + folio_zero_segments(folio, to, block_end, block_start, from); continue; } } - if (PageUptodate(page)) { + if (folio_test_uptodate(folio)) { if (!buffer_uptodate(bh)) set_buffer_uptodate(bh); continue; @@ -2050,14 +2050,15 @@ int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, err = -EIO; } if (unlikely(err)) - page_zero_new_buffers(page, from, to); + page_zero_new_buffers(&folio->page, from, to); return err; } int __block_write_begin(struct page *page, loff_t pos, unsigned len, get_block_t *get_block) { - return __block_write_begin_int(page, pos, len, get_block, NULL); + return __block_write_begin_int(page_folio(page), pos, len, get_block, + NULL); } EXPORT_SYMBOL(__block_write_begin); diff --git a/fs/internal.h b/fs/internal.h index 7979ff8d168c..8590c973c2f4 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -37,7 +37,7 @@ static inline int emergency_thaw_bdev(struct super_block *sb) /* * buffer.c */ -int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, +int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block, const struct iomap *iomap); /* diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 71a36ae120ee..ecb65167715b 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -603,6 +603,7 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, const struct iomap_page_ops *page_ops = iter->iomap.page_ops; const struct iomap *srcmap = iomap_iter_srcmap(iter); struct page *page; + struct folio *folio; int status = 0; BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length); @@ -624,11 +625,12 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, status = -ENOMEM; goto out_no_page; } + folio = page_folio(page); if (srcmap->type == IOMAP_INLINE) status = iomap_write_begin_inline(iter, page); else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) - status = __block_write_begin_int(page, pos, len, NULL, srcmap); + status = __block_write_begin_int(folio, pos, len, NULL, srcmap); else status = __iomap_write_begin(iter, pos, len, page); @@ -960,11 +962,12 @@ EXPORT_SYMBOL_GPL(iomap_truncate_page); static loff_t iomap_page_mkwrite_iter(struct iomap_iter *iter, struct page *page) { + struct folio *folio = page_folio(page); loff_t length = iomap_length(iter); int ret; if (iter->iomap.flags & IOMAP_F_BUFFER_HEAD) { - ret = __block_write_begin_int(page, iter->pos, length, NULL, + ret = __block_write_begin_int(folio, iter->pos, length, NULL, &iter->iomap); if (ret) return ret; -- cgit v1.2.3 From 95c4cd053a1d7c4f1e171ec31d2fb8a8f5c87efe Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 27 Apr 2021 22:56:17 -0400 Subject: iomap: Convert to_iomap_page to take a folio The big comment about only using a head page can go away now that it takes a folio argument. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/iomap/buffered-io.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index ecb65167715b..101d5b0754e9 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -22,8 +22,8 @@ #include "../internal.h" /* - * Structure allocated for each page or THP when block size < page size - * to track sub-page uptodate status and I/O completions. + * Structure allocated for each folio when block size < folio size + * to track sub-folio uptodate status and I/O completions. */ struct iomap_page { atomic_t read_bytes_pending; @@ -32,17 +32,10 @@ struct iomap_page { unsigned long uptodate[]; }; -static inline struct iomap_page *to_iomap_page(struct page *page) +static inline struct iomap_page *to_iomap_page(struct folio *folio) { - /* - * per-block data is stored in the head page. Callers should - * not be dealing with tail pages, and if they are, they can - * call thp_head() first. - */ - VM_BUG_ON_PGFLAGS(PageTail(page), page); - - if (page_has_private(page)) - return (struct iomap_page *)page_private(page); + if (folio_test_private(folio)) + return folio_get_private(folio); return NULL; } @@ -51,7 +44,8 @@ static struct bio_set iomap_ioend_bioset; static struct iomap_page * iomap_page_create(struct inode *inode, struct page *page) { - struct iomap_page *iop = to_iomap_page(page); + struct folio *folio = page_folio(page); + struct iomap_page *iop = to_iomap_page(folio); unsigned int nr_blocks = i_blocks_per_page(inode, page); if (iop || nr_blocks <= 1) @@ -144,7 +138,8 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop, static void iomap_iop_set_range_uptodate(struct page *page, unsigned off, unsigned len) { - struct iomap_page *iop = to_iomap_page(page); + struct folio *folio = page_folio(page); + struct iomap_page *iop = to_iomap_page(folio); struct inode *inode = page->mapping->host; unsigned first = off >> inode->i_blkbits; unsigned last = (off + len - 1) >> inode->i_blkbits; @@ -173,7 +168,8 @@ static void iomap_read_page_end_io(struct bio_vec *bvec, int error) { struct page *page = bvec->bv_page; - struct iomap_page *iop = to_iomap_page(page); + struct folio *folio = page_folio(page); + struct iomap_page *iop = to_iomap_page(folio); if (unlikely(error)) { ClearPageUptodate(page); @@ -438,7 +434,8 @@ int iomap_is_partially_uptodate(struct page *page, unsigned long from, unsigned long count) { - struct iomap_page *iop = to_iomap_page(page); + struct folio *folio = page_folio(page); + struct iomap_page *iop = to_iomap_page(folio); struct inode *inode = page->mapping->host; unsigned len, first, last; unsigned i; @@ -1012,7 +1009,8 @@ static void iomap_finish_page_writeback(struct inode *inode, struct page *page, int error, unsigned int len) { - struct iomap_page *iop = to_iomap_page(page); + struct folio *folio = page_folio(page); + struct iomap_page *iop = to_iomap_page(folio); if (error) { SetPageError(page); -- cgit v1.2.3 From 435d44b3fd0ab5750b1001ac7105830e63ad0b5b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 27 Apr 2021 23:12:52 -0400 Subject: iomap: Convert iomap_page_create to take a folio This function already assumed it was being passed a head page, so just formalise that. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/iomap/buffered-io.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 101d5b0754e9..d7823610da5c 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -42,11 +42,10 @@ static inline struct iomap_page *to_iomap_page(struct folio *folio) static struct bio_set iomap_ioend_bioset; static struct iomap_page * -iomap_page_create(struct inode *inode, struct page *page) +iomap_page_create(struct inode *inode, struct folio *folio) { - struct folio *folio = page_folio(page); struct iomap_page *iop = to_iomap_page(folio); - unsigned int nr_blocks = i_blocks_per_page(inode, page); + unsigned int nr_blocks = i_blocks_per_folio(inode, folio); if (iop || nr_blocks <= 1) return iop; @@ -54,9 +53,9 @@ iomap_page_create(struct inode *inode, struct page *page) iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)), GFP_NOFS | __GFP_NOFAIL); spin_lock_init(&iop->uptodate_lock); - if (PageUptodate(page)) + if (folio_test_uptodate(folio)) bitmap_fill(iop->uptodate, nr_blocks); - attach_page_private(page, iop); + folio_attach_private(folio, iop); return iop; } @@ -213,6 +212,7 @@ struct iomap_readpage_ctx { static int iomap_read_inline_data(const struct iomap_iter *iter, struct page *page) { + struct folio *folio = page_folio(page); const struct iomap *iomap = iomap_iter_srcmap(iter); size_t size = i_size_read(iter->inode) - iomap->offset; size_t poff = offset_in_page(iomap->offset); @@ -229,7 +229,7 @@ static int iomap_read_inline_data(const struct iomap_iter *iter, if (WARN_ON_ONCE(size > iomap->length)) return -EIO; if (poff > 0) - iomap_page_create(iter->inode, page); + iomap_page_create(iter->inode, folio); addr = kmap_local_page(page) + poff; memcpy(addr, iomap->inline_data, size); @@ -256,6 +256,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, loff_t pos = iter->pos + offset; loff_t length = iomap_length(iter) - offset; struct page *page = ctx->cur_page; + struct folio *folio = page_folio(page); struct iomap_page *iop; loff_t orig_pos = pos; unsigned poff, plen; @@ -265,7 +266,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, return iomap_read_inline_data(iter, page); /* zero post-eof blocks as the page may be mapped */ - iop = iomap_page_create(iter->inode, page); + iop = iomap_page_create(iter->inode, folio); iomap_adjust_read_range(iter->inode, iop, &pos, length, &poff, &plen); if (plen == 0) goto done; @@ -547,8 +548,9 @@ iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff, static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, unsigned len, struct page *page) { + struct folio *folio = page_folio(page); const struct iomap *srcmap = iomap_iter_srcmap(iter); - struct iomap_page *iop = iomap_page_create(iter->inode, page); + struct iomap_page *iop = iomap_page_create(iter->inode, folio); loff_t block_size = i_blocksize(iter->inode); loff_t block_start = round_down(pos, block_size); loff_t block_end = round_up(pos + len, block_size); @@ -1296,7 +1298,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, struct writeback_control *wbc, struct inode *inode, struct page *page, u64 end_offset) { - struct iomap_page *iop = iomap_page_create(inode, page); + struct folio *folio = page_folio(page); + struct iomap_page *iop = iomap_page_create(inode, folio); struct iomap_ioend *ioend, *next; unsigned len = i_blocksize(inode); u64 file_offset; /* file offset of page */ -- cgit v1.2.3 From c46e8324cab0254060c27ab7ef879673468998c5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 27 Apr 2021 23:22:22 -0400 Subject: iomap: Convert iomap_page_release to take a folio iomap_page_release() was also assuming that it was being passed a head page. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/iomap/buffered-io.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index d7823610da5c..16604f605357 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -59,18 +59,18 @@ iomap_page_create(struct inode *inode, struct folio *folio) return iop; } -static void -iomap_page_release(struct page *page) +static void iomap_page_release(struct folio *folio) { - struct iomap_page *iop = detach_page_private(page); - unsigned int nr_blocks = i_blocks_per_page(page->mapping->host, page); + struct iomap_page *iop = folio_detach_private(folio); + struct inode *inode = folio->mapping->host; + unsigned int nr_blocks = i_blocks_per_folio(inode, folio); if (!iop) return; WARN_ON_ONCE(atomic_read(&iop->read_bytes_pending)); WARN_ON_ONCE(atomic_read(&iop->write_bytes_pending)); WARN_ON_ONCE(bitmap_full(iop->uptodate, nr_blocks) != - PageUptodate(page)); + folio_test_uptodate(folio)); kfree(iop); } @@ -462,6 +462,8 @@ EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate); int iomap_releasepage(struct page *page, gfp_t gfp_mask) { + struct folio *folio = page_folio(page); + trace_iomap_releasepage(page->mapping->host, page_offset(page), PAGE_SIZE); @@ -472,7 +474,7 @@ iomap_releasepage(struct page *page, gfp_t gfp_mask) */ if (PageDirty(page) || PageWriteback(page)) return 0; - iomap_page_release(page); + iomap_page_release(folio); return 1; } EXPORT_SYMBOL_GPL(iomap_releasepage); @@ -480,6 +482,8 @@ EXPORT_SYMBOL_GPL(iomap_releasepage); void iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len) { + struct folio *folio = page_folio(page); + trace_iomap_invalidatepage(page->mapping->host, offset, len); /* @@ -489,7 +493,7 @@ iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len) if (offset == 0 && len == PAGE_SIZE) { WARN_ON_ONCE(PageWriteback(page)); cancel_dirty_page(page); - iomap_page_release(page); + iomap_page_release(folio); } } EXPORT_SYMBOL_GPL(iomap_invalidatepage); -- cgit v1.2.3 From 39f16c83453d6cdb601dc9cd51c8f321c14da644 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 28 Apr 2021 07:51:36 -0400 Subject: iomap: Convert iomap_releasepage to use a folio This is an address_space operation, so its argument must remain as a struct page, but we can use a folio internally. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 16604f605357..b0192b148c9f 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -464,15 +464,15 @@ iomap_releasepage(struct page *page, gfp_t gfp_mask) { struct folio *folio = page_folio(page); - trace_iomap_releasepage(page->mapping->host, page_offset(page), - PAGE_SIZE); + trace_iomap_releasepage(folio->mapping->host, folio_pos(folio), + folio_size(folio)); /* * mm accommodates an old ext3 case where clean pages might not have had * the dirty bit cleared. Thus, it can send actual dirty pages to * ->releasepage() via shrink_active_list(); skip those here. */ - if (PageDirty(page) || PageWriteback(page)) + if (folio_test_dirty(folio) || folio_test_writeback(folio)) return 0; iomap_page_release(folio); return 1; -- cgit v1.2.3 From 8306a5f56305521d8b307b4ee1f69949fbb49279 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 28 Apr 2021 07:51:36 -0400 Subject: iomap: Add iomap_invalidate_folio Keep iomap_invalidatepage around as a wrapper for use in address_space operations. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 20 ++++++++++++-------- include/linux/iomap.h | 1 + 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index b0192b148c9f..de7ce1909527 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -479,23 +479,27 @@ iomap_releasepage(struct page *page, gfp_t gfp_mask) } EXPORT_SYMBOL_GPL(iomap_releasepage); -void -iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len) +void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len) { - struct folio *folio = page_folio(page); - - trace_iomap_invalidatepage(page->mapping->host, offset, len); + trace_iomap_invalidatepage(folio->mapping->host, offset, len); /* * If we're invalidating the entire page, clear the dirty state from it * and release it to avoid unnecessary buildup of the LRU. */ - if (offset == 0 && len == PAGE_SIZE) { - WARN_ON_ONCE(PageWriteback(page)); - cancel_dirty_page(page); + if (offset == 0 && len == folio_size(folio)) { + WARN_ON_ONCE(folio_test_writeback(folio)); + folio_cancel_dirty(folio); iomap_page_release(folio); } } +EXPORT_SYMBOL_GPL(iomap_invalidate_folio); + +void iomap_invalidatepage(struct page *page, unsigned int offset, + unsigned int len) +{ + iomap_invalidate_folio(page_folio(page), offset, len); +} EXPORT_SYMBOL_GPL(iomap_invalidatepage); #ifdef CONFIG_MIGRATION diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 6d1b08d0ae93..29491fb9c5ba 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -225,6 +225,7 @@ void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); int iomap_is_partially_uptodate(struct page *page, unsigned long from, unsigned long count); int iomap_releasepage(struct page *page, gfp_t gfp_mask); +void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); void iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len); #ifdef CONFIG_MIGRATION -- cgit v1.2.3 From cd1e5afe5503edea2538ba426905914d9ab36958 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 28 Apr 2021 08:16:50 -0400 Subject: iomap: Pass the iomap_page into iomap_set_range_uptodate All but one caller already has the iomap_page, so we can avoid getting it again. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/iomap/buffered-io.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index de7ce1909527..856ef62b319e 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -134,11 +134,9 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop, *lenp = plen; } -static void -iomap_iop_set_range_uptodate(struct page *page, unsigned off, unsigned len) +static void iomap_iop_set_range_uptodate(struct page *page, + struct iomap_page *iop, unsigned off, unsigned len) { - struct folio *folio = page_folio(page); - struct iomap_page *iop = to_iomap_page(folio); struct inode *inode = page->mapping->host; unsigned first = off >> inode->i_blkbits; unsigned last = (off + len - 1) >> inode->i_blkbits; @@ -151,14 +149,14 @@ iomap_iop_set_range_uptodate(struct page *page, unsigned off, unsigned len) spin_unlock_irqrestore(&iop->uptodate_lock, flags); } -static void -iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len) +static void iomap_set_range_uptodate(struct page *page, + struct iomap_page *iop, unsigned off, unsigned len) { if (PageError(page)) return; - if (page_has_private(page)) - iomap_iop_set_range_uptodate(page, off, len); + if (iop) + iomap_iop_set_range_uptodate(page, iop, off, len); else SetPageUptodate(page); } @@ -174,7 +172,8 @@ iomap_read_page_end_io(struct bio_vec *bvec, int error) ClearPageUptodate(page); SetPageError(page); } else { - iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len); + iomap_set_range_uptodate(page, iop, bvec->bv_offset, + bvec->bv_len); } if (!iop || atomic_sub_and_test(bvec->bv_len, &iop->read_bytes_pending)) @@ -213,6 +212,7 @@ static int iomap_read_inline_data(const struct iomap_iter *iter, struct page *page) { struct folio *folio = page_folio(page); + struct iomap_page *iop; const struct iomap *iomap = iomap_iter_srcmap(iter); size_t size = i_size_read(iter->inode) - iomap->offset; size_t poff = offset_in_page(iomap->offset); @@ -229,13 +229,15 @@ static int iomap_read_inline_data(const struct iomap_iter *iter, if (WARN_ON_ONCE(size > iomap->length)) return -EIO; if (poff > 0) - iomap_page_create(iter->inode, folio); + iop = iomap_page_create(iter->inode, folio); + else + iop = to_iomap_page(folio); addr = kmap_local_page(page) + poff; memcpy(addr, iomap->inline_data, size); memset(addr + size, 0, PAGE_SIZE - poff - size); kunmap_local(addr); - iomap_set_range_uptodate(page, poff, PAGE_SIZE - poff); + iomap_set_range_uptodate(page, iop, poff, PAGE_SIZE - poff); return 0; } @@ -273,7 +275,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, if (iomap_block_needs_zeroing(iter, pos)) { zero_user(page, poff, plen); - iomap_set_range_uptodate(page, poff, plen); + iomap_set_range_uptodate(page, iop, poff, plen); goto done; } @@ -589,7 +591,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, if (status) return status; } - iomap_set_range_uptodate(page, poff, plen); + iomap_set_range_uptodate(page, iop, poff, plen); } while ((block_start += plen) < block_end); return 0; @@ -661,6 +663,8 @@ out_no_page: static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, size_t copied, struct page *page) { + struct folio *folio = page_folio(page); + struct iomap_page *iop = to_iomap_page(folio); flush_dcache_page(page); /* @@ -676,7 +680,7 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, */ if (unlikely(copied < len && !PageUptodate(page))) return 0; - iomap_set_range_uptodate(page, offset_in_page(pos), len); + iomap_set_range_uptodate(page, iop, offset_in_page(pos), len); __set_page_dirty_nobuffers(page); return copied; } -- cgit v1.2.3 From 8ffd74e9a8161df544ce63b49a5a092bcb18f8e6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 1 Jan 2021 16:53:26 -0500 Subject: iomap: Convert bio completions to use folios Use bio_for_each_folio() to iterate over each folio in the bio instead of iterating over each page. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/iomap/buffered-io.c | 50 +++++++++++++++++++++----------------------------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 856ef62b319e..5730a3317407 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -161,34 +161,29 @@ static void iomap_set_range_uptodate(struct page *page, SetPageUptodate(page); } -static void -iomap_read_page_end_io(struct bio_vec *bvec, int error) +static void iomap_finish_folio_read(struct folio *folio, size_t offset, + size_t len, int error) { - struct page *page = bvec->bv_page; - struct folio *folio = page_folio(page); struct iomap_page *iop = to_iomap_page(folio); if (unlikely(error)) { - ClearPageUptodate(page); - SetPageError(page); + folio_clear_uptodate(folio); + folio_set_error(folio); } else { - iomap_set_range_uptodate(page, iop, bvec->bv_offset, - bvec->bv_len); + iomap_set_range_uptodate(&folio->page, iop, offset, len); } - if (!iop || atomic_sub_and_test(bvec->bv_len, &iop->read_bytes_pending)) - unlock_page(page); + if (!iop || atomic_sub_and_test(len, &iop->read_bytes_pending)) + folio_unlock(folio); } -static void -iomap_read_end_io(struct bio *bio) +static void iomap_read_end_io(struct bio *bio) { int error = blk_status_to_errno(bio->bi_status); - struct bio_vec *bvec; - struct bvec_iter_all iter_all; + struct folio_iter fi; - bio_for_each_segment_all(bvec, bio, iter_all) - iomap_read_page_end_io(bvec, error); + bio_for_each_folio_all(fi, bio) + iomap_finish_folio_read(fi.folio, fi.offset, fi.length, error); bio_put(bio); } @@ -1019,23 +1014,21 @@ out_unlock: } EXPORT_SYMBOL_GPL(iomap_page_mkwrite); -static void -iomap_finish_page_writeback(struct inode *inode, struct page *page, - int error, unsigned int len) +static void iomap_finish_folio_write(struct inode *inode, struct folio *folio, + size_t len, int error) { - struct folio *folio = page_folio(page); struct iomap_page *iop = to_iomap_page(folio); if (error) { - SetPageError(page); + folio_set_error(folio); mapping_set_error(inode->i_mapping, error); } - WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop); + WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !iop); WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) <= 0); if (!iop || atomic_sub_and_test(len, &iop->write_bytes_pending)) - end_page_writeback(page); + folio_end_writeback(folio); } /* @@ -1054,8 +1047,7 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error) bool quiet = bio_flagged(bio, BIO_QUIET); for (bio = &ioend->io_inline_bio; bio; bio = next) { - struct bio_vec *bv; - struct bvec_iter_all iter_all; + struct folio_iter fi; /* * For the last bio, bi_private points to the ioend, so we @@ -1066,10 +1058,10 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error) else next = bio->bi_private; - /* walk each page on bio, ending page IO on them */ - bio_for_each_segment_all(bv, bio, iter_all) - iomap_finish_page_writeback(inode, bv->bv_page, error, - bv->bv_len); + /* walk all folios in bio, ending page IO on them */ + bio_for_each_folio_all(fi, bio) + iomap_finish_folio_write(inode, fi.folio, fi.length, + error); bio_put(bio); } /* The ioend has been freed by bio_put() */ -- cgit v1.2.3 From 431c0566bb60780238534dc1fdd709acca1d0795 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 28 Apr 2021 08:20:48 -0400 Subject: iomap: Use folio offsets instead of page offsets Pass a folio around instead of the page, and make sure the offset is relative to the start of the folio instead of the start of a page. Also use size_t for offset & length to make it clear that these are byte counts, and to support >2GB folios in the future. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/iomap/buffered-io.c | 78 ++++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 5730a3317407..06ff80c05340 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -75,18 +75,18 @@ static void iomap_page_release(struct folio *folio) } /* - * Calculate the range inside the page that we actually need to read. + * Calculate the range inside the folio that we actually need to read. */ -static void -iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop, - loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp) +static void iomap_adjust_read_range(struct inode *inode, struct folio *folio, + loff_t *pos, loff_t length, size_t *offp, size_t *lenp) { + struct iomap_page *iop = to_iomap_page(folio); loff_t orig_pos = *pos; loff_t isize = i_size_read(inode); unsigned block_bits = inode->i_blkbits; unsigned block_size = (1 << block_bits); - unsigned poff = offset_in_page(*pos); - unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length); + size_t poff = offset_in_folio(folio, *pos); + size_t plen = min_t(loff_t, folio_size(folio) - poff, length); unsigned first = poff >> block_bits; unsigned last = (poff + plen - 1) >> block_bits; @@ -124,7 +124,7 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop, * page cache for blocks that are entirely outside of i_size. */ if (orig_pos <= isize && orig_pos + length > isize) { - unsigned end = offset_in_page(isize - 1) >> block_bits; + unsigned end = offset_in_folio(folio, isize - 1) >> block_bits; if (first <= end && last > end) plen -= (last - end) * block_size; @@ -134,31 +134,31 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop, *lenp = plen; } -static void iomap_iop_set_range_uptodate(struct page *page, - struct iomap_page *iop, unsigned off, unsigned len) +static void iomap_iop_set_range_uptodate(struct folio *folio, + struct iomap_page *iop, size_t off, size_t len) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; unsigned first = off >> inode->i_blkbits; unsigned last = (off + len - 1) >> inode->i_blkbits; unsigned long flags; spin_lock_irqsave(&iop->uptodate_lock, flags); bitmap_set(iop->uptodate, first, last - first + 1); - if (bitmap_full(iop->uptodate, i_blocks_per_page(inode, page))) - SetPageUptodate(page); + if (bitmap_full(iop->uptodate, i_blocks_per_folio(inode, folio))) + folio_mark_uptodate(folio); spin_unlock_irqrestore(&iop->uptodate_lock, flags); } -static void iomap_set_range_uptodate(struct page *page, - struct iomap_page *iop, unsigned off, unsigned len) +static void iomap_set_range_uptodate(struct folio *folio, + struct iomap_page *iop, size_t off, size_t len) { - if (PageError(page)) + if (folio_test_error(folio)) return; if (iop) - iomap_iop_set_range_uptodate(page, iop, off, len); + iomap_iop_set_range_uptodate(folio, iop, off, len); else - SetPageUptodate(page); + folio_mark_uptodate(folio); } static void iomap_finish_folio_read(struct folio *folio, size_t offset, @@ -170,7 +170,7 @@ static void iomap_finish_folio_read(struct folio *folio, size_t offset, folio_clear_uptodate(folio); folio_set_error(folio); } else { - iomap_set_range_uptodate(&folio->page, iop, offset, len); + iomap_set_range_uptodate(folio, iop, offset, len); } if (!iop || atomic_sub_and_test(len, &iop->read_bytes_pending)) @@ -211,6 +211,7 @@ static int iomap_read_inline_data(const struct iomap_iter *iter, const struct iomap *iomap = iomap_iter_srcmap(iter); size_t size = i_size_read(iter->inode) - iomap->offset; size_t poff = offset_in_page(iomap->offset); + size_t offset = offset_in_folio(folio, iomap->offset); void *addr; if (PageUptodate(page)) @@ -223,7 +224,7 @@ static int iomap_read_inline_data(const struct iomap_iter *iter, return -EIO; if (WARN_ON_ONCE(size > iomap->length)) return -EIO; - if (poff > 0) + if (offset > 0) iop = iomap_page_create(iter->inode, folio); else iop = to_iomap_page(folio); @@ -232,7 +233,7 @@ static int iomap_read_inline_data(const struct iomap_iter *iter, memcpy(addr, iomap->inline_data, size); memset(addr + size, 0, PAGE_SIZE - poff - size); kunmap_local(addr); - iomap_set_range_uptodate(page, iop, poff, PAGE_SIZE - poff); + iomap_set_range_uptodate(folio, iop, offset, PAGE_SIZE - poff); return 0; } @@ -256,7 +257,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, struct folio *folio = page_folio(page); struct iomap_page *iop; loff_t orig_pos = pos; - unsigned poff, plen; + size_t poff, plen; sector_t sector; if (iomap->type == IOMAP_INLINE) @@ -264,13 +265,13 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, /* zero post-eof blocks as the page may be mapped */ iop = iomap_page_create(iter->inode, folio); - iomap_adjust_read_range(iter->inode, iop, &pos, length, &poff, &plen); + iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen); if (plen == 0) goto done; if (iomap_block_needs_zeroing(iter, pos)) { - zero_user(page, poff, plen); - iomap_set_range_uptodate(page, iop, poff, plen); + folio_zero_range(folio, poff, plen); + iomap_set_range_uptodate(folio, iop, poff, plen); goto done; } @@ -281,7 +282,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, sector = iomap_sector(iomap, pos); if (!ctx->bio || bio_end_sector(ctx->bio) != sector || - bio_add_page(ctx->bio, page, plen, poff) != plen) { + !bio_add_folio(ctx->bio, folio, plen, poff)) { gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); gfp_t orig_gfp = gfp; unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE); @@ -305,8 +306,9 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, ctx->bio->bi_iter.bi_sector = sector; bio_set_dev(ctx->bio, iomap->bdev); ctx->bio->bi_end_io = iomap_read_end_io; - __bio_add_page(ctx->bio, page, plen, poff); + bio_add_folio(ctx->bio, folio, plen, poff); } + done: /* * Move the caller beyond our range so that it keeps making progress. @@ -535,9 +537,8 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) truncate_pagecache_range(inode, max(pos, i_size), pos + len); } -static int -iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff, - unsigned plen, const struct iomap *iomap) +static int iomap_read_folio_sync(loff_t block_start, struct folio *folio, + size_t poff, size_t plen, const struct iomap *iomap) { struct bio_vec bvec; struct bio bio; @@ -546,7 +547,7 @@ iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff, bio.bi_opf = REQ_OP_READ; bio.bi_iter.bi_sector = iomap_sector(iomap, block_start); bio_set_dev(&bio, iomap->bdev); - __bio_add_page(&bio, page, plen, poff); + bio_add_folio(&bio, folio, plen, poff); return submit_bio_wait(&bio); } @@ -559,14 +560,15 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, loff_t block_size = i_blocksize(iter->inode); loff_t block_start = round_down(pos, block_size); loff_t block_end = round_up(pos + len, block_size); - unsigned from = offset_in_page(pos), to = from + len, poff, plen; + size_t from = offset_in_folio(folio, pos), to = from + len; + size_t poff, plen; - if (PageUptodate(page)) + if (folio_test_uptodate(folio)) return 0; - ClearPageError(page); + folio_clear_error(folio); do { - iomap_adjust_read_range(iter->inode, iop, &block_start, + iomap_adjust_read_range(iter->inode, folio, &block_start, block_end - block_start, &poff, &plen); if (plen == 0) break; @@ -579,14 +581,14 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, if (iomap_block_needs_zeroing(iter, block_start)) { if (WARN_ON_ONCE(iter->flags & IOMAP_UNSHARE)) return -EIO; - zero_user_segments(page, poff, from, to, poff + plen); + folio_zero_segments(folio, poff, from, to, poff + plen); } else { - int status = iomap_read_page_sync(block_start, page, + int status = iomap_read_folio_sync(block_start, folio, poff, plen, srcmap); if (status) return status; } - iomap_set_range_uptodate(page, iop, poff, plen); + iomap_set_range_uptodate(folio, iop, poff, plen); } while ((block_start += plen) < block_end); return 0; @@ -675,7 +677,7 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, */ if (unlikely(copied < len && !PageUptodate(page))) return 0; - iomap_set_range_uptodate(page, iop, offset_in_page(pos), len); + iomap_set_range_uptodate(folio, iop, offset_in_folio(folio, pos), len); __set_page_dirty_nobuffers(page); return copied; } -- cgit v1.2.3 From 874628a2c5900358ca89d733cc3865c15bdcd5d8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 23 Jul 2021 23:24:50 -0400 Subject: iomap: Convert iomap_read_inline_data to take a folio We still only support up to a single page of inline data (at least, per call to iomap_read_inline_data()), but it can now be written into the middle of a folio in case we decide to allocate a 16KiB page for a file that's 8.1KiB in size. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/iomap/buffered-io.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 06ff80c05340..2ebea02780b8 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -197,16 +197,15 @@ struct iomap_readpage_ctx { /** * iomap_read_inline_data - copy inline data into the page cache * @iter: iteration structure - * @page: page to copy to + * @folio: folio to copy to * - * Copy the inline data in @iter into @page and zero out the rest of the page. + * Copy the inline data in @iter into @folio and zero out the rest of the folio. * Only a single IOMAP_INLINE extent is allowed at the end of each file. * Returns zero for success to complete the read, or the usual negative errno. */ static int iomap_read_inline_data(const struct iomap_iter *iter, - struct page *page) + struct folio *folio) { - struct folio *folio = page_folio(page); struct iomap_page *iop; const struct iomap *iomap = iomap_iter_srcmap(iter); size_t size = i_size_read(iter->inode) - iomap->offset; @@ -214,7 +213,7 @@ static int iomap_read_inline_data(const struct iomap_iter *iter, size_t offset = offset_in_folio(folio, iomap->offset); void *addr; - if (PageUptodate(page)) + if (folio_test_uptodate(folio)) return 0; if (WARN_ON_ONCE(size > PAGE_SIZE - poff)) @@ -229,7 +228,7 @@ static int iomap_read_inline_data(const struct iomap_iter *iter, else iop = to_iomap_page(folio); - addr = kmap_local_page(page) + poff; + addr = kmap_local_folio(folio, offset); memcpy(addr, iomap->inline_data, size); memset(addr + size, 0, PAGE_SIZE - poff - size); kunmap_local(addr); @@ -261,7 +260,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, sector_t sector; if (iomap->type == IOMAP_INLINE) - return iomap_read_inline_data(iter, page); + return iomap_read_inline_data(iter, folio); /* zero post-eof blocks as the page may be mapped */ iop = iomap_page_create(iter->inode, folio); @@ -597,10 +596,12 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, static int iomap_write_begin_inline(const struct iomap_iter *iter, struct page *page) { + struct folio *folio = page_folio(page); + /* needs more work for the tailpacking case; disable for now */ if (WARN_ON_ONCE(iomap_iter_srcmap(iter)->offset != 0)) return -EIO; - return iomap_read_inline_data(iter, page); + return iomap_read_inline_data(iter, folio); } static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, -- cgit v1.2.3 From 3aa9c659bf821f112ed765d822f67340a92b8b82 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 28 Apr 2021 09:39:51 -0400 Subject: iomap: Convert readahead and readpage to use a folio Handle folios of arbitrary size instead of working in PAGE_SIZE units. readahead_folio() decreases the page refcount for you, so this is not quite a mechanical change. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/iomap/buffered-io.c | 53 +++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 2ebea02780b8..ad89c20cb741 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -188,8 +188,8 @@ static void iomap_read_end_io(struct bio *bio) } struct iomap_readpage_ctx { - struct page *cur_page; - bool cur_page_in_bio; + struct folio *cur_folio; + bool cur_folio_in_bio; struct bio *bio; struct readahead_control *rac; }; @@ -252,8 +252,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, const struct iomap *iomap = &iter->iomap; loff_t pos = iter->pos + offset; loff_t length = iomap_length(iter) - offset; - struct page *page = ctx->cur_page; - struct folio *folio = page_folio(page); + struct folio *folio = ctx->cur_folio; struct iomap_page *iop; loff_t orig_pos = pos; size_t poff, plen; @@ -274,7 +273,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, goto done; } - ctx->cur_page_in_bio = true; + ctx->cur_folio_in_bio = true; if (iop) atomic_add(plen, &iop->read_bytes_pending); @@ -282,7 +281,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, if (!ctx->bio || bio_end_sector(ctx->bio) != sector || !bio_add_folio(ctx->bio, folio, plen, poff)) { - gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); + gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL); gfp_t orig_gfp = gfp; unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE); @@ -321,30 +320,31 @@ done: int iomap_readpage(struct page *page, const struct iomap_ops *ops) { + struct folio *folio = page_folio(page); struct iomap_iter iter = { - .inode = page->mapping->host, - .pos = page_offset(page), - .len = PAGE_SIZE, + .inode = folio->mapping->host, + .pos = folio_pos(folio), + .len = folio_size(folio), }; struct iomap_readpage_ctx ctx = { - .cur_page = page, + .cur_folio = folio, }; int ret; - trace_iomap_readpage(page->mapping->host, 1); + trace_iomap_readpage(iter.inode, 1); while ((ret = iomap_iter(&iter, ops)) > 0) iter.processed = iomap_readpage_iter(&iter, &ctx, 0); if (ret < 0) - SetPageError(page); + folio_set_error(folio); if (ctx.bio) { submit_bio(ctx.bio); - WARN_ON_ONCE(!ctx.cur_page_in_bio); + WARN_ON_ONCE(!ctx.cur_folio_in_bio); } else { - WARN_ON_ONCE(ctx.cur_page_in_bio); - unlock_page(page); + WARN_ON_ONCE(ctx.cur_folio_in_bio); + folio_unlock(folio); } /* @@ -363,15 +363,15 @@ static loff_t iomap_readahead_iter(const struct iomap_iter *iter, loff_t done, ret; for (done = 0; done < length; done += ret) { - if (ctx->cur_page && offset_in_page(iter->pos + done) == 0) { - if (!ctx->cur_page_in_bio) - unlock_page(ctx->cur_page); - put_page(ctx->cur_page); - ctx->cur_page = NULL; + if (ctx->cur_folio && + offset_in_folio(ctx->cur_folio, iter->pos + done) == 0) { + if (!ctx->cur_folio_in_bio) + folio_unlock(ctx->cur_folio); + ctx->cur_folio = NULL; } - if (!ctx->cur_page) { - ctx->cur_page = readahead_page(ctx->rac); - ctx->cur_page_in_bio = false; + if (!ctx->cur_folio) { + ctx->cur_folio = readahead_folio(ctx->rac); + ctx->cur_folio_in_bio = false; } ret = iomap_readpage_iter(iter, ctx, done); if (ret <= 0) @@ -414,10 +414,9 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) if (ctx.bio) submit_bio(ctx.bio); - if (ctx.cur_page) { - if (!ctx.cur_page_in_bio) - unlock_page(ctx.cur_page); - put_page(ctx.cur_page); + if (ctx.cur_folio) { + if (!ctx.cur_folio_in_bio) + folio_unlock(ctx.cur_folio); } } EXPORT_SYMBOL_GPL(iomap_readahead); -- cgit v1.2.3 From ea0f843aa7942f169ff45d8c68aa81c1645772a6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 28 Apr 2021 22:32:02 -0400 Subject: iomap: Convert iomap_page_mkwrite to use a folio If we write to any page in a folio, we have to mark the entire folio as dirty, and potentially COW the entire folio, because it'll all get written back as one unit. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/iomap/buffered-io.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index ad89c20cb741..8d7a67655b60 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -967,10 +967,9 @@ iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, } EXPORT_SYMBOL_GPL(iomap_truncate_page); -static loff_t iomap_page_mkwrite_iter(struct iomap_iter *iter, - struct page *page) +static loff_t iomap_folio_mkwrite_iter(struct iomap_iter *iter, + struct folio *folio) { - struct folio *folio = page_folio(page); loff_t length = iomap_length(iter); int ret; @@ -979,10 +978,10 @@ static loff_t iomap_page_mkwrite_iter(struct iomap_iter *iter, &iter->iomap); if (ret) return ret; - block_commit_write(page, 0, length); + block_commit_write(&folio->page, 0, length); } else { - WARN_ON_ONCE(!PageUptodate(page)); - set_page_dirty(page); + WARN_ON_ONCE(!folio_test_uptodate(folio)); + folio_mark_dirty(folio); } return length; @@ -994,24 +993,24 @@ vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops) .inode = file_inode(vmf->vma->vm_file), .flags = IOMAP_WRITE | IOMAP_FAULT, }; - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); ssize_t ret; - lock_page(page); - ret = page_mkwrite_check_truncate(page, iter.inode); + folio_lock(folio); + ret = folio_mkwrite_check_truncate(folio, iter.inode); if (ret < 0) goto out_unlock; - iter.pos = page_offset(page); + iter.pos = folio_pos(folio); iter.len = ret; while ((ret = iomap_iter(&iter, ops)) > 0) - iter.processed = iomap_page_mkwrite_iter(&iter, page); + iter.processed = iomap_folio_mkwrite_iter(&iter, folio); if (ret < 0) goto out_unlock; - wait_for_stable_page(page); + folio_wait_stable(folio); return VM_FAULT_LOCKED; out_unlock: - unlock_page(page); + folio_unlock(folio); return block_page_mkwrite_return(ret); } EXPORT_SYMBOL_GPL(iomap_page_mkwrite); -- cgit v1.2.3 From d454ab82bc7f4aa7af9f539d5cf9a1e237cdcbc2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Dec 2021 15:47:44 -0500 Subject: iomap: Allow iomap_write_begin() to be called with the full length In the future, we want write_begin to know the entire length of the write so that it can choose to allocate large folios. Pass the full length in from __iomap_zero_iter() and limit it where necessary. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 8d7a67655b60..b1ded5204d1c 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -619,6 +619,9 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, if (fatal_signal_pending(current)) return -EINTR; + if (!mapping_large_folio_support(iter->inode->i_mapping)) + len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos)); + if (page_ops && page_ops->page_prepare) { status = page_ops->page_prepare(iter->inode, pos, len); if (status) @@ -632,6 +635,8 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, goto out_no_page; } folio = page_folio(page); + if (pos + len > folio_pos(folio) + folio_size(folio)) + len = folio_pos(folio) + folio_size(folio) - pos; if (srcmap->type == IOMAP_INLINE) status = iomap_write_begin_inline(iter, page); @@ -891,11 +896,13 @@ static s64 __iomap_zero_iter(struct iomap_iter *iter, loff_t pos, u64 length) struct page *page; int status; unsigned offset = offset_in_page(pos); - unsigned bytes = min_t(u64, PAGE_SIZE - offset, length); + unsigned bytes = min_t(u64, UINT_MAX, length); status = iomap_write_begin(iter, pos, bytes, &page); if (status) return status; + if (bytes > PAGE_SIZE - offset) + bytes = PAGE_SIZE - offset; zero_user(page, offset, bytes); mark_page_accessed(page); -- cgit v1.2.3 From a25def1fe56858efa40a8490e875da4a711487f8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 5 Nov 2021 14:24:09 -0400 Subject: iomap: Convert __iomap_zero_iter to use a folio The zero iterator can work in folio-sized chunks instead of page-sized chunks. This will save a lot of page cache lookups if the file is cached in large folios. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index b1ded5204d1c..47cf558244f4 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -893,19 +893,23 @@ EXPORT_SYMBOL_GPL(iomap_file_unshare); static s64 __iomap_zero_iter(struct iomap_iter *iter, loff_t pos, u64 length) { + struct folio *folio; struct page *page; int status; - unsigned offset = offset_in_page(pos); + size_t offset; unsigned bytes = min_t(u64, UINT_MAX, length); status = iomap_write_begin(iter, pos, bytes, &page); if (status) return status; - if (bytes > PAGE_SIZE - offset) - bytes = PAGE_SIZE - offset; + folio = page_folio(page); + + offset = offset_in_folio(folio, pos); + if (bytes > folio_size(folio) - offset) + bytes = folio_size(folio) - offset; - zero_user(page, offset, bytes); - mark_page_accessed(page); + folio_zero_range(folio, offset, bytes); + folio_mark_accessed(folio); return iomap_write_end(iter, pos, bytes, bytes, page); } -- cgit v1.2.3 From bc6123a84a71b5dd39192c02ea8f9f4266980b0e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 2 May 2021 11:33:08 -0400 Subject: iomap: Convert iomap_write_begin() and iomap_write_end() to folios These functions still only work in PAGE_SIZE chunks, but there are fewer conversions from tail to head pages as a result of this patch. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 71 +++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 38 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 47cf558244f4..d33d49440aa1 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -550,9 +550,8 @@ static int iomap_read_folio_sync(loff_t block_start, struct folio *folio, } static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, - unsigned len, struct page *page) + size_t len, struct folio *folio) { - struct folio *folio = page_folio(page); const struct iomap *srcmap = iomap_iter_srcmap(iter); struct iomap_page *iop = iomap_page_create(iter->inode, folio); loff_t block_size = i_blocksize(iter->inode); @@ -593,10 +592,8 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, } static int iomap_write_begin_inline(const struct iomap_iter *iter, - struct page *page) + struct folio *folio) { - struct folio *folio = page_folio(page); - /* needs more work for the tailpacking case; disable for now */ if (WARN_ON_ONCE(iomap_iter_srcmap(iter)->offset != 0)) return -EIO; @@ -604,12 +601,12 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter, } static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, - unsigned len, struct page **pagep) + size_t len, struct folio **foliop) { const struct iomap_page_ops *page_ops = iter->iomap.page_ops; const struct iomap *srcmap = iomap_iter_srcmap(iter); - struct page *page; struct folio *folio; + unsigned fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE | FGP_NOFS; int status = 0; BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length); @@ -628,32 +625,31 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, return status; } - page = grab_cache_page_write_begin(iter->inode->i_mapping, - pos >> PAGE_SHIFT, AOP_FLAG_NOFS); - if (!page) { + folio = __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT, + fgp, mapping_gfp_mask(iter->inode->i_mapping)); + if (!folio) { status = -ENOMEM; goto out_no_page; } - folio = page_folio(page); if (pos + len > folio_pos(folio) + folio_size(folio)) len = folio_pos(folio) + folio_size(folio) - pos; if (srcmap->type == IOMAP_INLINE) - status = iomap_write_begin_inline(iter, page); + status = iomap_write_begin_inline(iter, folio); else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) status = __block_write_begin_int(folio, pos, len, NULL, srcmap); else - status = __iomap_write_begin(iter, pos, len, page); + status = __iomap_write_begin(iter, pos, len, folio); if (unlikely(status)) goto out_unlock; - *pagep = page; + *foliop = folio; return 0; out_unlock: - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); iomap_write_failed(iter->inode, pos, len); out_no_page: @@ -663,11 +659,10 @@ out_no_page: } static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, - size_t copied, struct page *page) + size_t copied, struct folio *folio) { - struct folio *folio = page_folio(page); struct iomap_page *iop = to_iomap_page(folio); - flush_dcache_page(page); + flush_dcache_folio(folio); /* * The blocks that were entirely written will now be uptodate, so we @@ -680,10 +675,10 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, * non-uptodate page as a zero-length write, and force the caller to * redo the whole thing. */ - if (unlikely(copied < len && !PageUptodate(page))) + if (unlikely(copied < len && !folio_test_uptodate(folio))) return 0; iomap_set_range_uptodate(folio, iop, offset_in_folio(folio, pos), len); - __set_page_dirty_nobuffers(page); + filemap_dirty_folio(inode->i_mapping, folio); return copied; } @@ -707,7 +702,7 @@ static size_t iomap_write_end_inline(const struct iomap_iter *iter, /* Returns the number of bytes copied. May be 0. Cannot be an errno. */ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, - size_t copied, struct page *page) + size_t copied, struct folio *folio) { const struct iomap_page_ops *page_ops = iter->iomap.page_ops; const struct iomap *srcmap = iomap_iter_srcmap(iter); @@ -715,12 +710,12 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, size_t ret; if (srcmap->type == IOMAP_INLINE) { - ret = iomap_write_end_inline(iter, page, pos, copied); + ret = iomap_write_end_inline(iter, &folio->page, pos, copied); } else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { ret = block_write_end(NULL, iter->inode->i_mapping, pos, len, - copied, page, NULL); + copied, &folio->page, NULL); } else { - ret = __iomap_write_end(iter->inode, pos, len, copied, page); + ret = __iomap_write_end(iter->inode, pos, len, copied, folio); } /* @@ -732,13 +727,13 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, i_size_write(iter->inode, pos + ret); iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; } - unlock_page(page); + folio_unlock(folio); if (old_size < pos) pagecache_isize_extended(iter->inode, old_size, pos); if (page_ops && page_ops->page_done) - page_ops->page_done(iter->inode, pos, ret, page); - put_page(page); + page_ops->page_done(iter->inode, pos, ret, &folio->page); + folio_put(folio); if (ret < len) iomap_write_failed(iter->inode, pos, len); @@ -753,6 +748,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) long status = 0; do { + struct folio *folio; struct page *page; unsigned long offset; /* Offset into pagecache page */ unsigned long bytes; /* Bytes to write to page */ @@ -776,16 +772,17 @@ again: break; } - status = iomap_write_begin(iter, pos, bytes, &page); + status = iomap_write_begin(iter, pos, bytes, &folio); if (unlikely(status)) break; + page = folio_file_page(folio, pos >> PAGE_SHIFT); if (mapping_writably_mapped(iter->inode->i_mapping)) flush_dcache_page(page); copied = copy_page_from_iter_atomic(page, offset, bytes, i); - status = iomap_write_end(iter, pos, bytes, copied, page); + status = iomap_write_end(iter, pos, bytes, copied, folio); if (unlikely(copied != status)) iov_iter_revert(i, copied - status); @@ -851,13 +848,13 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) do { unsigned long offset = offset_in_page(pos); unsigned long bytes = min_t(loff_t, PAGE_SIZE - offset, length); - struct page *page; + struct folio *folio; - status = iomap_write_begin(iter, pos, bytes, &page); + status = iomap_write_begin(iter, pos, bytes, &folio); if (unlikely(status)) return status; - status = iomap_write_end(iter, pos, bytes, bytes, page); + status = iomap_write_end(iter, pos, bytes, bytes, folio); if (WARN_ON_ONCE(status == 0)) return -EIO; @@ -894,15 +891,13 @@ EXPORT_SYMBOL_GPL(iomap_file_unshare); static s64 __iomap_zero_iter(struct iomap_iter *iter, loff_t pos, u64 length) { struct folio *folio; - struct page *page; int status; size_t offset; - unsigned bytes = min_t(u64, UINT_MAX, length); + size_t bytes = min_t(u64, SIZE_MAX, length); - status = iomap_write_begin(iter, pos, bytes, &page); + status = iomap_write_begin(iter, pos, bytes, &folio); if (status) return status; - folio = page_folio(page); offset = offset_in_folio(folio, pos); if (bytes > folio_size(folio) - offset) @@ -911,7 +906,7 @@ static s64 __iomap_zero_iter(struct iomap_iter *iter, loff_t pos, u64 length) folio_zero_range(folio, offset, bytes); folio_mark_accessed(folio); - return iomap_write_end(iter, pos, bytes, bytes, page); + return iomap_write_end(iter, pos, bytes, bytes, folio); } static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) -- cgit v1.2.3 From 9c4ce08dd21145d10775c6ce6f21330a9558f8d9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 2 May 2021 11:44:44 -0400 Subject: iomap: Convert iomap_write_end_inline to take a folio This conversion is only safe because iomap only supports writes to inline data which starts at the beginning of the file. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/iomap/buffered-io.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index d33d49440aa1..b78a456e696f 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -683,16 +683,16 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, } static size_t iomap_write_end_inline(const struct iomap_iter *iter, - struct page *page, loff_t pos, size_t copied) + struct folio *folio, loff_t pos, size_t copied) { const struct iomap *iomap = &iter->iomap; void *addr; - WARN_ON_ONCE(!PageUptodate(page)); + WARN_ON_ONCE(!folio_test_uptodate(folio)); BUG_ON(!iomap_inline_data_valid(iomap)); - flush_dcache_page(page); - addr = kmap_local_page(page) + pos; + flush_dcache_folio(folio); + addr = kmap_local_folio(folio, pos); memcpy(iomap_inline_data(iomap, pos), addr, copied); kunmap_local(addr); @@ -710,7 +710,7 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, size_t ret; if (srcmap->type == IOMAP_INLINE) { - ret = iomap_write_end_inline(iter, &folio->page, pos, copied); + ret = iomap_write_end_inline(iter, folio, pos, copied); } else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { ret = block_write_end(NULL, iter->inode->i_mapping, pos, len, copied, &folio->page, NULL); -- cgit v1.2.3 From 6e478521df535b9d5ef5eb84d4352f235bbbef99 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 30 Jul 2021 09:56:05 -0400 Subject: iomap,xfs: Convert ->discard_page to ->discard_folio XFS has the only implementation of ->discard_page today, so convert it to use folios in the same patch as converting the API. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 4 ++-- fs/xfs/xfs_aops.c | 24 ++++++++++++------------ include/linux/iomap.h | 2 +- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index b78a456e696f..b403b83eedaf 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1360,8 +1360,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, * won't be affected by I/O completion and we must unlock it * now. */ - if (wpc->ops->discard_page) - wpc->ops->discard_page(page, file_offset); + if (wpc->ops->discard_folio) + wpc->ops->discard_folio(folio, file_offset); if (!count) { ClearPageUptodate(page); unlock_page(page); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index c8c15c3c3147..4098a9875c5b 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -437,37 +437,37 @@ xfs_prepare_ioend( * see a ENOSPC in writeback). */ static void -xfs_discard_page( - struct page *page, - loff_t fileoff) +xfs_discard_folio( + struct folio *folio, + loff_t pos) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - unsigned int pageoff = offset_in_page(fileoff); - xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, fileoff); - xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, pageoff); + size_t offset = offset_in_folio(folio, pos); + xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, pos); + xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, offset); int error; if (xfs_is_shutdown(mp)) goto out_invalidate; xfs_alert_ratelimited(mp, - "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.", - page, ip->i_ino, fileoff); + "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", + folio, ip->i_ino, pos); error = xfs_bmap_punch_delalloc_range(ip, start_fsb, - i_blocks_per_page(inode, page) - pageoff_fsb); + i_blocks_per_folio(inode, folio) - pageoff_fsb); if (error && !xfs_is_shutdown(mp)) xfs_alert(mp, "page discard unable to remove delalloc mapping."); out_invalidate: - iomap_invalidatepage(page, pageoff, PAGE_SIZE - pageoff); + iomap_invalidate_folio(folio, offset, folio_size(folio) - offset); } static const struct iomap_writeback_ops xfs_writeback_ops = { .map_blocks = xfs_map_blocks, .prepare_ioend = xfs_prepare_ioend, - .discard_page = xfs_discard_page, + .discard_folio = xfs_discard_folio, }; STATIC int diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 29491fb9c5ba..5ef5088dbbd8 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -285,7 +285,7 @@ struct iomap_writeback_ops { * Optional, allows the file system to discard state on a page where * we failed to submit any I/O. */ - void (*discard_page)(struct page *page, loff_t fileoff); + void (*discard_folio)(struct folio *folio, loff_t pos); }; struct iomap_writepage_ctx { -- cgit v1.2.3 From 926550362d609bba6aa3f8cab99ae324adadc343 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 2 Nov 2021 10:51:55 -0400 Subject: iomap: Simplify iomap_writepage_map() Rename end_offset to end_pos and file_offset to pos to match the rest of the file. Simplify the loop by calculating nblocks up front instead of each time around the loop. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index b403b83eedaf..682e15e50cf6 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1307,37 +1307,36 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page, static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, struct writeback_control *wbc, struct inode *inode, - struct page *page, u64 end_offset) + struct page *page, u64 end_pos) { struct folio *folio = page_folio(page); struct iomap_page *iop = iomap_page_create(inode, folio); struct iomap_ioend *ioend, *next; unsigned len = i_blocksize(inode); - u64 file_offset; /* file offset of page */ + unsigned nblocks = i_blocks_per_folio(inode, folio); + u64 pos = folio_pos(folio); int error = 0, count = 0, i; LIST_HEAD(submit_list); WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) != 0); /* - * Walk through the page to find areas to write back. If we run off the - * end of the current map or find the current map invalid, grab a new - * one. + * Walk through the folio to find areas to write back. If we + * run off the end of the current map or find the current map + * invalid, grab a new one. */ - for (i = 0, file_offset = page_offset(page); - i < (PAGE_SIZE >> inode->i_blkbits) && file_offset < end_offset; - i++, file_offset += len) { + for (i = 0; i < nblocks && pos < end_pos; i++, pos += len) { if (iop && !test_bit(i, iop->uptodate)) continue; - error = wpc->ops->map_blocks(wpc, inode, file_offset); + error = wpc->ops->map_blocks(wpc, inode, pos); if (error) break; if (WARN_ON_ONCE(wpc->iomap.type == IOMAP_INLINE)) continue; if (wpc->iomap.type == IOMAP_HOLE) continue; - iomap_add_to_ioend(inode, file_offset, page, iop, wpc, wbc, + iomap_add_to_ioend(inode, pos, page, iop, wpc, wbc, &submit_list); count++; } @@ -1361,7 +1360,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, * now. */ if (wpc->ops->discard_folio) - wpc->ops->discard_folio(folio, file_offset); + wpc->ops->discard_folio(folio, pos); if (!count) { ClearPageUptodate(page); unlock_page(page); -- cgit v1.2.3 From 81d4782a741b21c101eb368c120f65f7d624c219 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 2 Nov 2021 11:41:16 -0400 Subject: iomap: Simplify iomap_do_writepage() Rename end_offset to end_pos and offset_into_page to poff to match the rest of the file. Simplify the handling of the last page straddling i_size by doing the EOF check based on the byte granularity i_size instead of converting to a pgoff prematurely. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 682e15e50cf6..46d458a63b86 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1408,9 +1408,7 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) { struct iomap_writepage_ctx *wpc = data; struct inode *inode = page->mapping->host; - pgoff_t end_index; - u64 end_offset; - loff_t offset; + u64 end_pos, isize; trace_iomap_writepage(inode, page_offset(page), PAGE_SIZE); @@ -1441,11 +1439,9 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) * | desired writeback range | see else | * ---------------------------------^------------------| */ - offset = i_size_read(inode); - end_index = offset >> PAGE_SHIFT; - if (page->index < end_index) - end_offset = (loff_t)(page->index + 1) << PAGE_SHIFT; - else { + isize = i_size_read(inode); + end_pos = page_offset(page) + PAGE_SIZE; + if (end_pos > isize) { /* * Check whether the page to write out is beyond or straddles * i_size or not. @@ -1457,7 +1453,8 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) * | | Straddles | * ---------------------------------^-----------|--------| */ - unsigned offset_into_page = offset & (PAGE_SIZE - 1); + size_t poff = offset_in_page(isize); + pgoff_t end_index = isize >> PAGE_SHIFT; /* * Skip the page if it's fully outside i_size, e.g. due to a @@ -1477,7 +1474,7 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) * offset is just equal to the EOF. */ if (page->index > end_index || - (page->index == end_index && offset_into_page == 0)) + (page->index == end_index && poff == 0)) goto redirty; /* @@ -1488,13 +1485,13 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) * memory is zeroed when mapped, and writes to that region are * not written out to the file." */ - zero_user_segment(page, offset_into_page, PAGE_SIZE); + zero_user_segment(page, poff, PAGE_SIZE); /* Adjust the end_offset to the end of file */ - end_offset = offset; + end_pos = isize; } - return iomap_writepage_map(wpc, wbc, inode, page, end_offset); + return iomap_writepage_map(wpc, wbc, inode, page, end_pos); redirty: redirty_page_for_writepage(wbc, page); -- cgit v1.2.3 From e735c0079465900d78d687f00aba625d46426b29 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 2 Nov 2021 12:45:12 -0400 Subject: iomap: Convert iomap_add_to_ioend() to take a folio We still iterate one block at a time, but now we call compound_head() less often. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 70 ++++++++++++++++++++++++-------------------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 46d458a63b86..e7f28f40f1d0 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1263,29 +1263,29 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset, * first; otherwise finish off the current ioend and start another. */ static void -iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page, +iomap_add_to_ioend(struct inode *inode, loff_t pos, struct folio *folio, struct iomap_page *iop, struct iomap_writepage_ctx *wpc, struct writeback_control *wbc, struct list_head *iolist) { - sector_t sector = iomap_sector(&wpc->iomap, offset); + sector_t sector = iomap_sector(&wpc->iomap, pos); unsigned len = i_blocksize(inode); - unsigned poff = offset & (PAGE_SIZE - 1); + size_t poff = offset_in_folio(folio, pos); - if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, offset, sector)) { + if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos, sector)) { if (wpc->ioend) list_add(&wpc->ioend->io_list, iolist); - wpc->ioend = iomap_alloc_ioend(inode, wpc, offset, sector, wbc); + wpc->ioend = iomap_alloc_ioend(inode, wpc, pos, sector, wbc); } - if (bio_add_page(wpc->ioend->io_bio, page, len, poff) != len) { + if (!bio_add_folio(wpc->ioend->io_bio, folio, len, poff)) { wpc->ioend->io_bio = iomap_chain_bio(wpc->ioend->io_bio); - __bio_add_page(wpc->ioend->io_bio, page, len, poff); + bio_add_folio(wpc->ioend->io_bio, folio, len, poff); } if (iop) atomic_add(len, &iop->write_bytes_pending); wpc->ioend->io_size += len; - wbc_account_cgroup_owner(wbc, page, len); + wbc_account_cgroup_owner(wbc, &folio->page, len); } /* @@ -1307,9 +1307,8 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page, static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, struct writeback_control *wbc, struct inode *inode, - struct page *page, u64 end_pos) + struct folio *folio, u64 end_pos) { - struct folio *folio = page_folio(page); struct iomap_page *iop = iomap_page_create(inode, folio); struct iomap_ioend *ioend, *next; unsigned len = i_blocksize(inode); @@ -1336,15 +1335,15 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, continue; if (wpc->iomap.type == IOMAP_HOLE) continue; - iomap_add_to_ioend(inode, pos, page, iop, wpc, wbc, + iomap_add_to_ioend(inode, pos, folio, iop, wpc, wbc, &submit_list); count++; } WARN_ON_ONCE(!wpc->ioend && !list_empty(&submit_list)); - WARN_ON_ONCE(!PageLocked(page)); - WARN_ON_ONCE(PageWriteback(page)); - WARN_ON_ONCE(PageDirty(page)); + WARN_ON_ONCE(!folio_test_locked(folio)); + WARN_ON_ONCE(folio_test_writeback(folio)); + WARN_ON_ONCE(folio_test_dirty(folio)); /* * We cannot cancel the ioend directly here on error. We may have @@ -1362,14 +1361,14 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, if (wpc->ops->discard_folio) wpc->ops->discard_folio(folio, pos); if (!count) { - ClearPageUptodate(page); - unlock_page(page); + folio_clear_uptodate(folio); + folio_unlock(folio); goto done; } } - set_page_writeback(page); - unlock_page(page); + folio_start_writeback(folio); + folio_unlock(folio); /* * Preserve the original error if there was one; catch @@ -1390,9 +1389,9 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, * with a partial page truncate on a sub-page block sized filesystem. */ if (!count) - end_page_writeback(page); + folio_end_writeback(folio); done: - mapping_set_error(page->mapping, error); + mapping_set_error(folio->mapping, error); return error; } @@ -1406,14 +1405,15 @@ done: static int iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) { + struct folio *folio = page_folio(page); struct iomap_writepage_ctx *wpc = data; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; u64 end_pos, isize; - trace_iomap_writepage(inode, page_offset(page), PAGE_SIZE); + trace_iomap_writepage(inode, folio_pos(folio), folio_size(folio)); /* - * Refuse to write the page out if we're called from reclaim context. + * Refuse to write the folio out if we're called from reclaim context. * * This avoids stack overflows when called from deeply used stacks in * random callers for direct reclaim or memcg reclaim. We explicitly @@ -1427,10 +1427,10 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) goto redirty; /* - * Is this page beyond the end of the file? + * Is this folio beyond the end of the file? * - * The page index is less than the end_index, adjust the end_offset - * to the highest offset that this page should represent. + * The folio index is less than the end_index, adjust the end_pos + * to the highest offset that this folio should represent. * ----------------------------------------------------- * | file mapping | | * ----------------------------------------------------- @@ -1440,7 +1440,7 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) * ---------------------------------^------------------| */ isize = i_size_read(inode); - end_pos = page_offset(page) + PAGE_SIZE; + end_pos = folio_pos(folio) + folio_size(folio); if (end_pos > isize) { /* * Check whether the page to write out is beyond or straddles @@ -1453,7 +1453,7 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) * | | Straddles | * ---------------------------------^-----------|--------| */ - size_t poff = offset_in_page(isize); + size_t poff = offset_in_folio(folio, isize); pgoff_t end_index = isize >> PAGE_SHIFT; /* @@ -1473,8 +1473,8 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) * checking if the page is totally beyond i_size or if its * offset is just equal to the EOF. */ - if (page->index > end_index || - (page->index == end_index && poff == 0)) + if (folio->index > end_index || + (folio->index == end_index && poff == 0)) goto redirty; /* @@ -1485,17 +1485,15 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) * memory is zeroed when mapped, and writes to that region are * not written out to the file." */ - zero_user_segment(page, poff, PAGE_SIZE); - - /* Adjust the end_offset to the end of file */ + folio_zero_segment(folio, poff, folio_size(folio)); end_pos = isize; } - return iomap_writepage_map(wpc, wbc, inode, page, end_pos); + return iomap_writepage_map(wpc, wbc, inode, folio, end_pos); redirty: - redirty_page_for_writepage(wbc, page); - unlock_page(page); + folio_redirty_for_writepage(wbc, folio); + folio_unlock(folio); return 0; } -- cgit v1.2.3 From 589110e897ff9cf7c3500c5fce1c688d1ffca6f4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 7 May 2021 15:08:09 -0400 Subject: iomap: Convert iomap_migrate_page() to use folios The arguments are still pages for now, but we can use folios internally and cut out a lot of calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index e7f28f40f1d0..ed796055e578 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -504,19 +504,21 @@ int iomap_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode) { + struct folio *folio = page_folio(page); + struct folio *newfolio = page_folio(newpage); int ret; - ret = migrate_page_move_mapping(mapping, newpage, page, 0); + ret = folio_migrate_mapping(mapping, newfolio, folio, 0); if (ret != MIGRATEPAGE_SUCCESS) return ret; - if (page_has_private(page)) - attach_page_private(newpage, detach_page_private(page)); + if (folio_test_private(folio)) + folio_attach_private(newfolio, folio_detach_private(folio)); if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); + folio_migrate_copy(newfolio, folio); else - migrate_page_states(newpage, page); + folio_migrate_flags(newfolio, folio); return MIGRATEPAGE_SUCCESS; } EXPORT_SYMBOL_GPL(iomap_migrate_page); -- cgit v1.2.3 From 60d8231089f0d955e0cce033421df4b19e9adfb0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 13 Jan 2021 10:48:49 -0500 Subject: iomap: Support large folios in invalidatepage If we're punching a hole in a large folio, we need to remove the per-folio iomap data as the folio is about to be split and each page will need its own. If a dirty folio is only partially-uptodate, the iomap data contains the information about which blocks cannot be written back, so assert that a dirty folio is fully uptodate. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index ed796055e578..ba80bedd9590 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -481,13 +481,18 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len) trace_iomap_invalidatepage(folio->mapping->host, offset, len); /* - * If we're invalidating the entire page, clear the dirty state from it - * and release it to avoid unnecessary buildup of the LRU. + * If we're invalidating the entire folio, clear the dirty state + * from it and release it to avoid unnecessary buildup of the LRU. */ if (offset == 0 && len == folio_size(folio)) { WARN_ON_ONCE(folio_test_writeback(folio)); folio_cancel_dirty(folio); iomap_page_release(folio); + } else if (folio_test_large(folio)) { + /* Must release the iop so the page can be split */ + WARN_ON_ONCE(!folio_test_uptodate(folio) && + folio_test_dirty(folio)); + iomap_page_release(folio); } } EXPORT_SYMBOL_GPL(iomap_invalidate_folio); -- cgit v1.2.3 From 6795801366da0cd3d99e27c37f020a8f16714886 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 19 May 2021 18:38:43 -0400 Subject: xfs: Support large folios Now that iomap has been converted, XFS is large folio safe. Indicate to the VFS that it can now create large folios for XFS. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_icache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index da4af2142a2b..cdc39f576ca1 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -87,6 +87,7 @@ xfs_inode_alloc( /* VFS doesn't initialise i_mode or i_state! */ VFS_I(ip)->i_mode = 0; VFS_I(ip)->i_state = 0; + mapping_set_large_folios(VFS_I(ip)->i_mapping); XFS_STATS_INC(mp, vn_active); ASSERT(atomic_read(&ip->i_pincount) == 0); @@ -320,6 +321,7 @@ xfs_reinit_inode( inode->i_rdev = dev; inode->i_uid = uid; inode->i_gid = gid; + mapping_set_large_folios(inode->i_mapping); return error; } -- cgit v1.2.3 From 4d7bd0eb72e5831ddb1288786a96448b48440825 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 20 Dec 2021 19:03:46 -0500 Subject: iomap: Inline __iomap_zero_iter into its caller To make the merge easier, replicate the inlining of __iomap_zero_iter() into iomap_zero_iter() that is currently in the nvdimm tree. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 55 +++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index ba80bedd9590..c6b3a148e898 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -895,27 +895,6 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, } EXPORT_SYMBOL_GPL(iomap_file_unshare); -static s64 __iomap_zero_iter(struct iomap_iter *iter, loff_t pos, u64 length) -{ - struct folio *folio; - int status; - size_t offset; - size_t bytes = min_t(u64, SIZE_MAX, length); - - status = iomap_write_begin(iter, pos, bytes, &folio); - if (status) - return status; - - offset = offset_in_folio(folio, pos); - if (bytes > folio_size(folio) - offset) - bytes = folio_size(folio) - offset; - - folio_zero_range(folio, offset, bytes); - folio_mark_accessed(folio); - - return iomap_write_end(iter, pos, bytes, bytes, folio); -} - static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) { struct iomap *iomap = &iter->iomap; @@ -929,14 +908,34 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) return length; do { - s64 bytes; + struct folio *folio; + int status; + size_t offset; + size_t bytes = min_t(u64, SIZE_MAX, length); + + if (IS_DAX(iter->inode)) { + s64 tmp = dax_iomap_zero(pos, bytes, iomap); + if (tmp < 0) + return tmp; + bytes = tmp; + goto good; + } - if (IS_DAX(iter->inode)) - bytes = dax_iomap_zero(pos, length, iomap); - else - bytes = __iomap_zero_iter(iter, pos, length); - if (bytes < 0) - return bytes; + status = iomap_write_begin(iter, pos, bytes, &folio); + if (status) + return status; + + offset = offset_in_folio(folio, pos); + if (bytes > folio_size(folio) - offset) + bytes = folio_size(folio) - offset; + + folio_zero_range(folio, offset, bytes); + folio_mark_accessed(folio); + + bytes = iomap_write_end(iter, pos, bytes, bytes, folio); +good: + if (WARN_ON_ONCE(bytes == 0)) + return -EIO; pos += bytes; length -= bytes; -- cgit v1.2.3