summaryrefslogtreecommitdiff
path: root/block/bio.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/bio.c')
-rw-r--r--block/bio.c180
1 files changed, 140 insertions, 40 deletions
diff --git a/block/bio.c b/block/bio.c
index 4be592d37fb6..3b371a5da159 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -251,6 +251,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
bio->bi_flags = 0;
bio->bi_ioprio = 0;
bio->bi_write_hint = 0;
+ bio->bi_write_stream = 0;
bio->bi_status = 0;
bio->bi_iter.bi_sector = 0;
bio->bi_iter.bi_size = 0;
@@ -652,13 +653,13 @@ static void bio_truncate(struct bio *bio, unsigned new_size)
bio_for_each_segment(bv, bio, iter) {
if (done + bv.bv_len > new_size) {
- unsigned offset;
+ size_t offset;
if (!truncated)
offset = new_size - done;
else
offset = 0;
- zero_user(bv.bv_page, bv.bv_offset + offset,
+ memzero_page(bv.bv_page, bv.bv_offset + offset,
bv.bv_len - offset);
truncated = true;
}
@@ -827,6 +828,7 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
bio_set_flag(bio, BIO_CLONED);
bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_write_hint = bio_src->bi_write_hint;
+ bio->bi_write_stream = bio_src->bi_write_stream;
bio->bi_iter = bio_src->bi_iter;
if (bio->bi_bdev) {
@@ -918,7 +920,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
}
static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
- unsigned int len, unsigned int off, bool *same_page)
+ unsigned int len, unsigned int off)
{
size_t bv_end = bv->bv_offset + bv->bv_len;
phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv_end - 1;
@@ -928,12 +930,8 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
return false;
if (xen_domain() && !xen_biovec_phys_mergeable(bv, page))
return false;
- if (!zone_device_pages_have_same_pgmap(bv->bv_page, page))
- return false;
- *same_page = ((vec_end_addr & PAGE_MASK) == ((page_addr + off) &
- PAGE_MASK));
- if (!*same_page) {
+ if ((vec_end_addr & PAGE_MASK) != ((page_addr + off) & PAGE_MASK)) {
if (IS_ENABLED(CONFIG_KMSAN))
return false;
if (bv->bv_page + bv_end / PAGE_SIZE != page + off / PAGE_SIZE)
@@ -953,8 +951,7 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
* helpers to split. Hopefully this will go away soon.
*/
bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
- struct page *page, unsigned len, unsigned offset,
- bool *same_page)
+ struct page *page, unsigned len, unsigned offset)
{
unsigned long mask = queue_segment_boundary(q);
phys_addr_t addr1 = bvec_phys(bv);
@@ -964,7 +961,7 @@ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
return false;
if (len > queue_max_segment_size(q) - bv->bv_len)
return false;
- return bvec_try_merge_page(bv, page, len, offset, same_page);
+ return bvec_try_merge_page(bv, page, len, offset);
}
/**
@@ -983,6 +980,9 @@ void __bio_add_page(struct bio *bio, struct page *page,
WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
WARN_ON_ONCE(bio_full(bio, len));
+ if (is_pci_p2pdma_page(page))
+ bio->bi_opf |= REQ_P2PDMA | REQ_NOMERGE;
+
bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, off);
bio->bi_iter.bi_size += len;
bio->bi_vcnt++;
@@ -990,6 +990,22 @@ void __bio_add_page(struct bio *bio, struct page *page,
EXPORT_SYMBOL_GPL(__bio_add_page);
/**
+ * bio_add_virt_nofail - add data in the direct kernel mapping to a bio
+ * @bio: destination bio
+ * @vaddr: data to add
+ * @len: length of the data to add, may cross pages
+ *
+ * Add the data at @vaddr to @bio. The caller must have ensure a segment
+ * is available for the added data. No merging into an existing segment
+ * will be performed.
+ */
+void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len)
+{
+ __bio_add_page(bio, virt_to_page(vaddr), len, offset_in_page(vaddr));
+}
+EXPORT_SYMBOL_GPL(bio_add_virt_nofail);
+
+/**
* bio_add_page - attempt to add page(s) to bio
* @bio: destination bio
* @page: start page to add
@@ -1002,18 +1018,21 @@ EXPORT_SYMBOL_GPL(__bio_add_page);
int bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset)
{
- bool same_page = false;
-
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return 0;
if (bio->bi_iter.bi_size > UINT_MAX - len)
return 0;
- if (bio->bi_vcnt > 0 &&
- bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1],
- page, len, offset, &same_page)) {
- bio->bi_iter.bi_size += len;
- return len;
+ if (bio->bi_vcnt > 0) {
+ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+
+ if (!zone_device_pages_have_same_pgmap(bv->bv_page, page))
+ return 0;
+
+ if (bvec_try_merge_page(bv, page, len, offset)) {
+ bio->bi_iter.bi_size += len;
+ return len;
+ }
}
if (bio->bi_vcnt >= bio->bi_max_vecs)
@@ -1058,6 +1077,61 @@ bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len,
}
EXPORT_SYMBOL(bio_add_folio);
+/**
+ * bio_add_vmalloc_chunk - add a vmalloc chunk to a bio
+ * @bio: destination bio
+ * @vaddr: vmalloc address to add
+ * @len: total length in bytes of the data to add
+ *
+ * Add data starting at @vaddr to @bio and return how many bytes were added.
+ * This may be less than the amount originally asked. Returns 0 if no data
+ * could be added to @bio.
+ *
+ * This helper calls flush_kernel_vmap_range() for the range added. For reads
+ * the caller still needs to manually call invalidate_kernel_vmap_range() in
+ * the completion handler.
+ */
+unsigned int bio_add_vmalloc_chunk(struct bio *bio, void *vaddr, unsigned len)
+{
+ unsigned int offset = offset_in_page(vaddr);
+
+ len = min(len, PAGE_SIZE - offset);
+ if (bio_add_page(bio, vmalloc_to_page(vaddr), len, offset) < len)
+ return 0;
+ if (op_is_write(bio_op(bio)))
+ flush_kernel_vmap_range(vaddr, len);
+ return len;
+}
+EXPORT_SYMBOL_GPL(bio_add_vmalloc_chunk);
+
+/**
+ * bio_add_vmalloc - add a vmalloc region to a bio
+ * @bio: destination bio
+ * @vaddr: vmalloc address to add
+ * @len: total length in bytes of the data to add
+ *
+ * Add data starting at @vaddr to @bio. Return %true on success or %false if
+ * @bio does not have enough space for the payload.
+ *
+ * This helper calls flush_kernel_vmap_range() for the range added. For reads
+ * the caller still needs to manually call invalidate_kernel_vmap_range() in
+ * the completion handler.
+ */
+bool bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned int len)
+{
+ do {
+ unsigned int added = bio_add_vmalloc_chunk(bio, vaddr, len);
+
+ if (!added)
+ return false;
+ vaddr += added;
+ len -= added;
+ } while (len);
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(bio_add_vmalloc);
+
void __bio_release_pages(struct bio *bio, bool mark_dirty)
{
struct folio_iter fi;
@@ -1088,27 +1162,6 @@ void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter)
bio_set_flag(bio, BIO_CLONED);
}
-static int bio_iov_add_folio(struct bio *bio, struct folio *folio, size_t len,
- size_t offset)
-{
- bool same_page = false;
-
- if (WARN_ON_ONCE(bio->bi_iter.bi_size > UINT_MAX - len))
- return -EIO;
-
- if (bio->bi_vcnt > 0 &&
- bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1],
- folio_page(folio, 0), len, offset,
- &same_page)) {
- bio->bi_iter.bi_size += len;
- if (same_page && bio_flagged(bio, BIO_PAGE_PINNED))
- unpin_user_folio(folio, 1);
- return 0;
- }
- bio_add_folio_nofail(bio, folio, len, offset);
- return 0;
-}
-
static unsigned int get_contig_folio_len(unsigned int *num_pages,
struct page **pages, unsigned int i,
struct folio *folio, size_t left,
@@ -1203,6 +1256,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
for (left = size, i = 0; left > 0; left -= len, i += num_pages) {
struct page *page = pages[i];
struct folio *folio = page_folio(page);
+ unsigned int old_vcnt = bio->bi_vcnt;
folio_offset = ((size_t)folio_page_idx(folio, page) <<
PAGE_SHIFT) + offset;
@@ -1215,7 +1269,23 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
len = get_contig_folio_len(&num_pages, pages, i,
folio, left, offset);
- bio_iov_add_folio(bio, folio, len, folio_offset);
+ if (!bio_add_folio(bio, folio, len, folio_offset)) {
+ WARN_ON_ONCE(1);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (bio_flagged(bio, BIO_PAGE_PINNED)) {
+ /*
+ * We're adding another fragment of a page that already
+ * was part of the last segment. Undo our pin as the
+ * page was pinned when an earlier fragment of it was
+ * added to the bio and __bio_release_pages expects a
+ * single pin per page.
+ */
+ if (offset && bio->bi_vcnt == old_vcnt)
+ unpin_user_folio(folio, 1);
+ }
offset = 0;
}
@@ -1301,6 +1371,36 @@ int submit_bio_wait(struct bio *bio)
}
EXPORT_SYMBOL(submit_bio_wait);
+/**
+ * bdev_rw_virt - synchronously read into / write from kernel mapping
+ * @bdev: block device to access
+ * @sector: sector to access
+ * @data: data to read/write
+ * @len: length in byte to read/write
+ * @op: operation (e.g. REQ_OP_READ/REQ_OP_WRITE)
+ *
+ * Performs synchronous I/O to @bdev for @data/@len. @data must be in
+ * the kernel direct mapping and not a vmalloc address.
+ */
+int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data,
+ size_t len, enum req_op op)
+{
+ struct bio_vec bv;
+ struct bio bio;
+ int error;
+
+ if (WARN_ON_ONCE(is_vmalloc_addr(data)))
+ return -EIO;
+
+ bio_init(&bio, bdev, &bv, 1, op);
+ bio.bi_iter.bi_sector = sector;
+ bio_add_virt_nofail(&bio, data, len);
+ error = submit_bio_wait(&bio);
+ bio_uninit(&bio);
+ return error;
+}
+EXPORT_SYMBOL_GPL(bdev_rw_virt);
+
static void bio_wait_end_io(struct bio *bio)
{
complete(bio->bi_private);