From fa0bdd45d7e3703826ea75f5fe3359865d75c319 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 26 Jan 2026 06:53:32 +0100 Subject: block: add a BIO_MAX_SIZE constant and use it Currently the only constant for the maximum bio size is BIO_MAX_SECTORS, which is in units of 512-byte sectors, but a lot of user need a byte limit. Add a BIO_MAX_SIZE constant, redefine BIO_MAX_SECTORS in terms of it, and switch all bio-related uses of UINT_MAX for the maximum size to use the symbolic names instead. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Darrick J. Wong Reviewed-by: Anuj Gupta Reviewed-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 19a888a2f104..d59553324a84 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -281,7 +281,8 @@ struct bio { }; #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) -#define BIO_MAX_SECTORS (UINT_MAX >> SECTOR_SHIFT) +#define BIO_MAX_SIZE UINT_MAX /* max value of bi_iter.bi_size */ +#define BIO_MAX_SECTORS (BIO_MAX_SIZE >> SECTOR_SHIFT) static inline struct bio_vec *bio_inline_vecs(struct bio *bio) { -- cgit v1.2.3 From 91b73c458182801a8c9cf6135335e064567d1013 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 26 Jan 2026 06:53:35 +0100 Subject: iov_iter: extract a iov_iter_extract_bvecs helper from bio code Massage __bio_iov_iter_get_pages so that it doesn't need the bio, and move it to lib/iov_iter.c so that it can be used by block code for other things than filling a bio and by other subsystems like netfs. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/bio.c | 120 ++++++++-------------------------------------------- include/linux/uio.h | 3 ++ lib/iov_iter.c | 98 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 102 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index 4591f0ba90f5..530082c8cf0c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1172,102 +1172,6 @@ void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter) bio_set_flag(bio, BIO_CLONED); } -static unsigned int get_contig_folio_len(struct page **pages, - unsigned int *num_pages, size_t left, - size_t offset) -{ - struct folio *folio = page_folio(pages[0]); - size_t contig_sz = min_t(size_t, PAGE_SIZE - offset, left); - unsigned int max_pages, i; - size_t folio_offset, len; - - folio_offset = PAGE_SIZE * folio_page_idx(folio, pages[0]) + offset; - len = min(folio_size(folio) - folio_offset, left); - - /* - * We might COW a single page in the middle of a large folio, so we have - * to check that all pages belong to the same folio. - */ - left -= contig_sz; - max_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); - for (i = 1; i < max_pages; i++) { - size_t next = min_t(size_t, PAGE_SIZE, left); - - if (page_folio(pages[i]) != folio || - pages[i] != pages[i - 1] + 1) - break; - contig_sz += next; - left -= next; - } - - *num_pages = i; - return contig_sz; -} - -#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) - -/** - * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio - * @bio: bio to add pages to - * @iter: iov iterator describing the region to be mapped - * - * Extracts pages from *iter and appends them to @bio's bvec array. The pages - * will have to be cleaned up in the way indicated by the BIO_PAGE_PINNED flag. - * For a multi-segment *iter, this function only adds pages from the next - * non-empty segment of the iov iterator. - */ -static ssize_t __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) -{ - iov_iter_extraction_t extraction_flags = 0; - unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; - unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; - struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; - struct page **pages = (struct page **)bv; - ssize_t size; - unsigned int i = 0; - size_t offset, left, len; - - /* - * Move page array up in the allocated memory for the bio vecs as far as - * possible so that we can start filling biovecs from the beginning - * without overwriting the temporary page array. - */ - BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); - pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); - - if (bio->bi_bdev && blk_queue_pci_p2pdma(bio->bi_bdev->bd_disk->queue)) - extraction_flags |= ITER_ALLOW_P2PDMA; - - size = iov_iter_extract_pages(iter, &pages, - BIO_MAX_SIZE - bio->bi_iter.bi_size, - nr_pages, extraction_flags, &offset); - if (unlikely(size <= 0)) - return size ? size : -EFAULT; - - nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE); - for (left = size; left > 0; left -= len) { - unsigned int nr_to_add; - - if (bio->bi_vcnt > 0) { - struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; - - if (!zone_device_pages_have_same_pgmap(prev->bv_page, - pages[i])) - break; - } - - len = get_contig_folio_len(&pages[i], &nr_to_add, left, offset); - __bio_add_page(bio, pages[i], len, offset); - i += nr_to_add; - offset = 0; - } - - iov_iter_revert(iter, left); - while (i < nr_pages) - bio_release_page(bio, pages[i++]); - return size - left; -} - /* * Aligns the bio size to the len_align_mask, releasing excessive bio vecs that * __bio_iov_iter_get_pages may have inserted, and reverts the trimmed length @@ -1325,7 +1229,7 @@ static int bio_iov_iter_align_down(struct bio *bio, struct iov_iter *iter, int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter, unsigned len_align_mask) { - ssize_t ret; + iov_iter_extraction_t flags = 0; if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) return -EIO; @@ -1338,14 +1242,26 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter, if (iov_iter_extract_will_pin(iter)) bio_set_flag(bio, BIO_PAGE_PINNED); + if (bio->bi_bdev && blk_queue_pci_p2pdma(bio->bi_bdev->bd_disk->queue)) + flags |= ITER_ALLOW_P2PDMA; do { - ret = __bio_iov_iter_get_pages(bio, iter); - } while (ret > 0 && iov_iter_count(iter) && !bio_full(bio, 0)); + ssize_t ret; + + ret = iov_iter_extract_bvecs(iter, bio->bi_io_vec, + BIO_MAX_SIZE - bio->bi_iter.bi_size, + &bio->bi_vcnt, bio->bi_max_vecs, flags); + if (ret <= 0) { + if (!bio->bi_vcnt) + return ret; + break; + } + bio->bi_iter.bi_size += ret; + } while (iov_iter_count(iter) && !bio_full(bio, 0)); - if (bio->bi_vcnt) - return bio_iov_iter_align_down(bio, iter, len_align_mask); - return ret; + if (is_pci_p2pdma_page(bio->bi_io_vec->bv_page)) + bio->bi_opf |= REQ_NOMERGE; + return bio_iov_iter_align_down(bio, iter, len_align_mask); } static void submit_bio_wait_endio(struct bio *bio) diff --git a/include/linux/uio.h b/include/linux/uio.h index 5b127043a151..a9bc5b3067e3 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -389,6 +389,9 @@ ssize_t iov_iter_extract_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned int maxpages, iov_iter_extraction_t extraction_flags, size_t *offset0); +ssize_t iov_iter_extract_bvecs(struct iov_iter *iter, struct bio_vec *bv, + size_t max_size, unsigned short *nr_vecs, + unsigned short max_vecs, iov_iter_extraction_t extraction_flags); /** * iov_iter_extract_will_pin - Indicate how pages from the iterator will be retained diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 896760bad455..545250507f08 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1845,3 +1845,101 @@ ssize_t iov_iter_extract_pages(struct iov_iter *i, return -EFAULT; } EXPORT_SYMBOL_GPL(iov_iter_extract_pages); + +static unsigned int get_contig_folio_len(struct page **pages, + unsigned int *num_pages, size_t left, size_t offset) +{ + struct folio *folio = page_folio(pages[0]); + size_t contig_sz = min_t(size_t, PAGE_SIZE - offset, left); + unsigned int max_pages, i; + size_t folio_offset, len; + + folio_offset = PAGE_SIZE * folio_page_idx(folio, pages[0]) + offset; + len = min(folio_size(folio) - folio_offset, left); + + /* + * We might COW a single page in the middle of a large folio, so we have + * to check that all pages belong to the same folio. + */ + left -= contig_sz; + max_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); + for (i = 1; i < max_pages; i++) { + size_t next = min_t(size_t, PAGE_SIZE, left); + + if (page_folio(pages[i]) != folio || + pages[i] != pages[i - 1] + 1) + break; + contig_sz += next; + left -= next; + } + + *num_pages = i; + return contig_sz; +} + +#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) + +/** + * iov_iter_extract_bvecs - Extract bvecs from an iterator + * @iter: the iterator to extract from + * @bv: bvec return array + * @max_size: maximum size to extract from @iter + * @nr_vecs: number of vectors in @bv (on in and output) + * @max_vecs: maximum vectors in @bv, including those filled before calling + * @extraction_flags: flags to qualify request + * + * Like iov_iter_extract_pages(), but returns physically contiguous ranges + * contained in a single folio as a single bvec instead of multiple entries. + * + * Returns the number of bytes extracted when successful, or a negative errno. + * If @nr_vecs was non-zero on entry, the number of successfully extracted bytes + * can be 0. + */ +ssize_t iov_iter_extract_bvecs(struct iov_iter *iter, struct bio_vec *bv, + size_t max_size, unsigned short *nr_vecs, + unsigned short max_vecs, iov_iter_extraction_t extraction_flags) +{ + unsigned short entries_left = max_vecs - *nr_vecs; + unsigned short nr_pages, i = 0; + size_t left, offset, len; + struct page **pages; + ssize_t size; + + /* + * Move page array up in the allocated memory for the bio vecs as far as + * possible so that we can start filling biovecs from the beginning + * without overwriting the temporary page array. + */ + BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); + pages = (struct page **)(bv + *nr_vecs) + + entries_left * (PAGE_PTRS_PER_BVEC - 1); + + size = iov_iter_extract_pages(iter, &pages, max_size, entries_left, + extraction_flags, &offset); + if (unlikely(size <= 0)) + return size ? size : -EFAULT; + + nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE); + for (left = size; left > 0; left -= len) { + unsigned int nr_to_add; + + if (*nr_vecs > 0 && + !zone_device_pages_have_same_pgmap(bv[*nr_vecs - 1].bv_page, + pages[i])) + break; + + len = get_contig_folio_len(&pages[i], &nr_to_add, left, offset); + bvec_set_page(&bv[*nr_vecs], pages[i], len, offset); + i += nr_to_add; + (*nr_vecs)++; + offset = 0; + } + + iov_iter_revert(iter, left); + if (iov_iter_extract_will_pin(iter)) { + while (i < nr_pages) + unpin_user_page(pages[i++]); + } + return size - left; +} +EXPORT_SYMBOL_GPL(iov_iter_extract_bvecs); -- cgit v1.2.3 From 8dd5e7c75d7bb2635c7efd219ff20693fc24096a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 26 Jan 2026 06:53:37 +0100 Subject: block: add helpers to bounce buffer an iov_iter into bios Add helpers to implement bounce buffering of data into a bio to implement direct I/O for cases where direct user access is not possible because stable in-flight data is required. These are intended to be used as easily as bio_iov_iter_get_pages for the zero-copy path. The write side is trivial and just copies data into the bounce buffer. The read side is a lot more complex because it needs to perform the copy from the completion context, and without preserving the iov_iter through the call chain. It steals a trick from the integrity data user interface and uses the first vector in the bio for the bounce buffer data that is fed to the block I/O stack, and uses the others to record the user buffer fragments. Signed-off-by: Christoph Hellwig Reviewed-by: Anuj Gupta Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Tested-by: Anuj Gupta Reviewed-by: Martin K. Petersen Reviewed-by: Darrick J. Wong Signed-off-by: Jens Axboe --- block/bio.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/bio.h | 26 ++++++++ 2 files changed, 205 insertions(+) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index 285b573ae82f..49f7548a31d6 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1266,6 +1266,185 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter, return bio_iov_iter_align_down(bio, iter, len_align_mask); } +static struct folio *folio_alloc_greedy(gfp_t gfp, size_t *size) +{ + struct folio *folio; + + while (*size > PAGE_SIZE) { + folio = folio_alloc(gfp | __GFP_NORETRY, get_order(*size)); + if (folio) + return folio; + *size = rounddown_pow_of_two(*size - 1); + } + + return folio_alloc(gfp, get_order(*size)); +} + +static void bio_free_folios(struct bio *bio) +{ + struct bio_vec *bv; + int i; + + bio_for_each_bvec_all(bv, bio, i) { + struct folio *folio = page_folio(bv->bv_page); + + if (!is_zero_folio(folio)) + folio_put(folio); + } +} + +static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter) +{ + size_t total_len = iov_iter_count(iter); + + if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) + return -EINVAL; + if (WARN_ON_ONCE(bio->bi_iter.bi_size)) + return -EINVAL; + if (WARN_ON_ONCE(bio->bi_vcnt >= bio->bi_max_vecs)) + return -EINVAL; + + do { + size_t this_len = min(total_len, SZ_1M); + struct folio *folio; + + if (this_len > PAGE_SIZE * 2) + this_len = rounddown_pow_of_two(this_len); + + if (bio->bi_iter.bi_size > BIO_MAX_SIZE - this_len) + break; + + folio = folio_alloc_greedy(GFP_KERNEL, &this_len); + if (!folio) + break; + bio_add_folio_nofail(bio, folio, this_len, 0); + + if (copy_from_iter(folio_address(folio), this_len, iter) != + this_len) { + bio_free_folios(bio); + return -EFAULT; + } + + total_len -= this_len; + } while (total_len && bio->bi_vcnt < bio->bi_max_vecs); + + if (!bio->bi_iter.bi_size) + return -ENOMEM; + return 0; +} + +static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter) +{ + size_t len = min(iov_iter_count(iter), SZ_1M); + struct folio *folio; + + folio = folio_alloc_greedy(GFP_KERNEL, &len); + if (!folio) + return -ENOMEM; + + do { + ssize_t ret; + + ret = iov_iter_extract_bvecs(iter, bio->bi_io_vec + 1, len, + &bio->bi_vcnt, bio->bi_max_vecs - 1, 0); + if (ret <= 0) { + if (!bio->bi_vcnt) + return ret; + break; + } + len -= ret; + bio->bi_iter.bi_size += ret; + } while (len && bio->bi_vcnt < bio->bi_max_vecs - 1); + + /* + * Set the folio directly here. The above loop has already calculated + * the correct bi_size, and we use bi_vcnt for the user buffers. That + * is safe as bi_vcnt is only used by the submitter and not the actual + * I/O path. + */ + bvec_set_folio(&bio->bi_io_vec[0], folio, bio->bi_iter.bi_size, 0); + if (iov_iter_extract_will_pin(iter)) + bio_set_flag(bio, BIO_PAGE_PINNED); + return 0; +} + +/** + * bio_iov_iter_bounce - bounce buffer data from an iter into a bio + * @bio: bio to send + * @iter: iter to read from / write into + * + * Helper for direct I/O implementations that need to bounce buffer because + * we need to checksum the data or perform other operations that require + * consistency. Allocates folios to back the bounce buffer, and for writes + * copies the data into it. Needs to be paired with bio_iov_iter_unbounce() + * called on completion. + */ +int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter) +{ + if (op_is_write(bio_op(bio))) + return bio_iov_iter_bounce_write(bio, iter); + return bio_iov_iter_bounce_read(bio, iter); +} + +static void bvec_unpin(struct bio_vec *bv, bool mark_dirty) +{ + struct folio *folio = page_folio(bv->bv_page); + size_t nr_pages = (bv->bv_offset + bv->bv_len - 1) / PAGE_SIZE - + bv->bv_offset / PAGE_SIZE + 1; + + if (mark_dirty) + folio_mark_dirty_lock(folio); + unpin_user_folio(folio, nr_pages); +} + +static void bio_iov_iter_unbounce_read(struct bio *bio, bool is_error, + bool mark_dirty) +{ + unsigned int len = bio->bi_io_vec[0].bv_len; + + if (likely(!is_error)) { + void *buf = bvec_virt(&bio->bi_io_vec[0]); + struct iov_iter to; + + iov_iter_bvec(&to, ITER_DEST, bio->bi_io_vec + 1, bio->bi_vcnt, + len); + /* copying to pinned pages should always work */ + WARN_ON_ONCE(copy_to_iter(buf, len, &to) != len); + } else { + /* No need to mark folios dirty if never copied to them */ + mark_dirty = false; + } + + if (bio_flagged(bio, BIO_PAGE_PINNED)) { + int i; + + for (i = 0; i < bio->bi_vcnt; i++) + bvec_unpin(&bio->bi_io_vec[1 + i], mark_dirty); + } + + folio_put(page_folio(bio->bi_io_vec[0].bv_page)); +} + +/** + * bio_iov_iter_unbounce - finish a bounce buffer operation + * @bio: completed bio + * @is_error: %true if an I/O error occurred and data should not be copied + * @mark_dirty: If %true, folios will be marked dirty. + * + * Helper for direct I/O implementations that need to bounce buffer because + * we need to checksum the data or perform other operations that require + * consistency. Called to complete a bio set up by bio_iov_iter_bounce(). + * Copies data back for reads, and marks the original folios dirty if + * requested and then frees the bounce buffer. + */ +void bio_iov_iter_unbounce(struct bio *bio, bool is_error, bool mark_dirty) +{ + if (op_is_write(bio_op(bio))) + bio_free_folios(bio); + else + bio_iov_iter_unbounce_read(bio, is_error, mark_dirty); +} + static void submit_bio_wait_endio(struct bio *bio) { complete(bio->bi_private); diff --git a/include/linux/bio.h b/include/linux/bio.h index d32aee2857a9..69d56b1d1bd2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -397,6 +397,29 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) return iov_iter_npages(iter, max_segs); } +/** + * bio_iov_bounce_nr_vecs - calculate number of bvecs for a bounce bio + * @iter: iter to bounce from + * @op: REQ_OP_* for the bio + * + * Calculates how many bvecs are needed for the next bio to bounce from/to + * @iter. + */ +static inline unsigned short +bio_iov_bounce_nr_vecs(struct iov_iter *iter, blk_opf_t op) +{ + /* + * We still need to bounce bvec iters, so don't special case them + * here unlike in bio_iov_vecs_to_alloc. + * + * For reads we need to use a vector for the bounce buffer, account + * for that here. + */ + if (op_is_write(op)) + return iov_iter_npages(iter, BIO_MAX_VECS); + return iov_iter_npages(iter, BIO_MAX_VECS - 1) + 1; +} + struct request_queue; void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, @@ -450,6 +473,9 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); +int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter); +void bio_iov_iter_unbounce(struct bio *bio, bool is_error, bool mark_dirty); + extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); -- cgit v1.2.3 From c9d114846b380fec1093b7bca91ee5a8cd7b575d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 26 Jan 2026 06:53:45 +0100 Subject: iomap: add a flag to bounce buffer direct I/O Add a new flag that request bounce buffering for direct I/O. This is needed to provide the stable pages requirement requested by devices that need to calculate checksums or parity over the data and allows file systems to properly work with things like T10 protection information. The implementation just calls out to the new bio bounce buffering helpers to allocate a bounce buffer, which is used for I/O and to copy to/from it. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Darrick J. Wong Tested-by: Anuj Gupta Signed-off-by: Jens Axboe --- fs/iomap/direct-io.c | 30 ++++++++++++++++++++---------- include/linux/iomap.h | 9 +++++++++ 2 files changed, 29 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index eca7adda595a..9c572de0d596 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -215,7 +215,11 @@ static void __iomap_dio_bio_end_io(struct bio *bio, bool inline_completion) { struct iomap_dio *dio = bio->bi_private; - if (dio->flags & IOMAP_DIO_USER_BACKED) { + if (dio->flags & IOMAP_DIO_BOUNCE) { + bio_iov_iter_unbounce(bio, !!dio->error, + dio->flags & IOMAP_DIO_USER_BACKED); + bio_put(bio); + } else if (dio->flags & IOMAP_DIO_USER_BACKED) { bio_check_pages_dirty(bio); } else { bio_release_pages(bio, false); @@ -303,12 +307,16 @@ static ssize_t iomap_dio_bio_iter_one(struct iomap_iter *iter, struct iomap_dio *dio, loff_t pos, unsigned int alignment, blk_opf_t op) { + unsigned int nr_vecs; struct bio *bio; ssize_t ret; - bio = iomap_dio_alloc_bio(iter, dio, - bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS), - op); + if (dio->flags & IOMAP_DIO_BOUNCE) + nr_vecs = bio_iov_bounce_nr_vecs(dio->submit.iter, op); + else + nr_vecs = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS); + + bio = iomap_dio_alloc_bio(iter, dio, nr_vecs, op); fscrypt_set_bio_crypt_ctx(bio, iter->inode, pos >> iter->inode->i_blkbits, GFP_KERNEL); bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos); @@ -317,7 +325,11 @@ static ssize_t iomap_dio_bio_iter_one(struct iomap_iter *iter, bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io; - ret = bio_iov_iter_get_pages(bio, dio->submit.iter, alignment - 1); + if (dio->flags & IOMAP_DIO_BOUNCE) + ret = bio_iov_iter_bounce(bio, dio->submit.iter); + else + ret = bio_iov_iter_get_pages(bio, dio->submit.iter, + alignment - 1); if (unlikely(ret)) goto out_put_bio; ret = bio->bi_iter.bi_size; @@ -333,7 +345,8 @@ static ssize_t iomap_dio_bio_iter_one(struct iomap_iter *iter, if (dio->flags & IOMAP_DIO_WRITE) task_io_account_write(ret); - else if (dio->flags & IOMAP_DIO_USER_BACKED) + else if ((dio->flags & IOMAP_DIO_USER_BACKED) && + !(dio->flags & IOMAP_DIO_BOUNCE)) bio_set_pages_dirty(bio); /* @@ -662,7 +675,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio->i_size = i_size_read(inode); dio->dops = dops; dio->error = 0; - dio->flags = 0; + dio->flags = dio_flags & (IOMAP_DIO_FSBLOCK_ALIGNED | IOMAP_DIO_BOUNCE); dio->done_before = done_before; dio->submit.iter = iter; @@ -671,9 +684,6 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (iocb->ki_flags & IOCB_NOWAIT) iomi.flags |= IOMAP_NOWAIT; - if (dio_flags & IOMAP_DIO_FSBLOCK_ALIGNED) - dio->flags |= IOMAP_DIO_FSBLOCK_ALIGNED; - if (iov_iter_rw(iter) == READ) { if (iomi.pos >= dio->i_size) goto out_free_dio; diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 520e967cb501..cf152f638665 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -562,6 +562,15 @@ struct iomap_dio_ops { */ #define IOMAP_DIO_FSBLOCK_ALIGNED (1 << 3) +/* + * Bounce buffer instead of using zero copy access. + * + * This is needed if the device needs stable data to checksum or generate + * parity. The file system must hook into the I/O submission and offload + * completions to user context for reads when this is set. + */ +#define IOMAP_DIO_BOUNCE (1 << 4) + ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, unsigned int dio_flags, void *private, size_t done_before); -- cgit v1.2.3