diff options
| author | Ming Lei <ming.lei@redhat.com> | 2026-03-31 18:31:55 +0300 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2026-04-07 16:40:19 +0300 |
| commit | 8a34e88769f617dc980edb5a0079e347bd1b9a89 (patch) | |
| tree | 601231c64a8915fd4d9e91aae150e3c453fc414a | |
| parent | 08677040a91199175149d1fd465c02e3b3fc768a (diff) | |
| download | linux-8a34e88769f617dc980edb5a0079e347bd1b9a89.tar.xz | |
ublk: eliminate permanent pages[] array from struct ublk_buf
The pages[] array (kvmalloc'd, 8 bytes per page = 2MB for a 1GB buffer)
was stored permanently in struct ublk_buf but only needed during
pin_user_pages_fast() and maple tree construction. Since the maple tree
already stores PFN ranges via ublk_buf_range, struct page pointers can
be recovered via pfn_to_page() during unregistration.
Make pages[] a temporary allocation in ublk_ctrl_reg_buf(), freed
immediately after the maple tree is built. Rewrite __ublk_ctrl_unreg_buf()
to iterate the maple tree for matching buf_index entries, recovering
struct page pointers via pfn_to_page() and unpinning in batches of 32.
Simplify ublk_buf_erase_ranges() to iterate the maple tree by buf_index
instead of walking the now-removed pages[] array.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://patch.msgid.link/20260331153207.3635125-5-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
| -rw-r--r-- | drivers/block/ublk_drv.c | 87 |
1 files changed, 55 insertions, 32 deletions
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index bdb1de41d526..1af42850f5b1 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -296,7 +296,6 @@ struct ublk_queue { /* Per-registered shared memory buffer */ struct ublk_buf { - struct page **pages; unsigned int nr_pages; }; @@ -5261,27 +5260,25 @@ static void ublk_unquiesce_and_resume(struct gendisk *disk) blk_mq_unquiesce_queue(disk->queue); } -/* Erase coalesced PFN ranges from the maple tree for pages [0, nr_pages) */ -static void ublk_buf_erase_ranges(struct ublk_device *ub, - struct ublk_buf *ubuf, - unsigned long nr_pages) +/* Erase coalesced PFN ranges from the maple tree matching buf_index */ +static void ublk_buf_erase_ranges(struct ublk_device *ub, int buf_index) { - unsigned long i; - - for (i = 0; i < nr_pages; ) { - unsigned long pfn = page_to_pfn(ubuf->pages[i]); - unsigned long start = i; + MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX); + struct ublk_buf_range *range; - while (i + 1 < nr_pages && - page_to_pfn(ubuf->pages[i + 1]) == pfn + (i - start) + 1) - i++; - i++; - kfree(mtree_erase(&ub->buf_tree, pfn)); + mas_lock(&mas); + mas_for_each(&mas, range, ULONG_MAX) { + if (range->buf_index == buf_index) { + mas_erase(&mas); + kfree(range); + } } + mas_unlock(&mas); } static int __ublk_ctrl_reg_buf(struct ublk_device *ub, - struct ublk_buf *ubuf, int index, + struct ublk_buf *ubuf, + struct page **pages, int index, unsigned short flags) { unsigned long nr_pages = ubuf->nr_pages; @@ -5289,13 +5286,13 @@ static int __ublk_ctrl_reg_buf(struct ublk_device *ub, int ret; for (i = 0; i < nr_pages; ) { - unsigned long pfn = page_to_pfn(ubuf->pages[i]); + unsigned long pfn = page_to_pfn(pages[i]); unsigned long start = i; struct ublk_buf_range *range; /* Find run of consecutive PFNs */ while (i + 1 < nr_pages && - page_to_pfn(ubuf->pages[i + 1]) == pfn + (i - start) + 1) + page_to_pfn(pages[i + 1]) == pfn + (i - start) + 1) i++; i++; /* past the last page in this run */ @@ -5320,7 +5317,7 @@ static int __ublk_ctrl_reg_buf(struct ublk_device *ub, return 0; unwind: - ublk_buf_erase_ranges(ub, ubuf, i); + ublk_buf_erase_ranges(ub, index); return ret; } @@ -5335,6 +5332,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub, void __user *argp = (void __user *)(unsigned long)header->addr; struct ublk_shmem_buf_reg buf_reg; unsigned long addr, size, nr_pages; + struct page **pages = NULL; unsigned int gup_flags; struct gendisk *disk; struct ublk_buf *ubuf; @@ -5371,9 +5369,8 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub, goto put_disk; } - ubuf->pages = kvmalloc_array(nr_pages, sizeof(*ubuf->pages), - GFP_KERNEL); - if (!ubuf->pages) { + pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL); + if (!pages) { ret = -ENOMEM; goto err_free; } @@ -5382,7 +5379,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub, if (!(buf_reg.flags & UBLK_SHMEM_BUF_READ_ONLY)) gup_flags |= FOLL_WRITE; - pinned = pin_user_pages_fast(addr, nr_pages, gup_flags, ubuf->pages); + pinned = pin_user_pages_fast(addr, nr_pages, gup_flags, pages); if (pinned < 0) { ret = pinned; goto err_free_pages; @@ -5406,7 +5403,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub, if (ret) goto err_unlock; - ret = __ublk_ctrl_reg_buf(ub, ubuf, index, buf_reg.flags); + ret = __ublk_ctrl_reg_buf(ub, ubuf, pages, index, buf_reg.flags); if (ret) { xa_erase(&ub->bufs_xa, index); goto err_unlock; @@ -5414,6 +5411,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub, mutex_unlock(&ub->mutex); + kvfree(pages); ublk_unquiesce_and_resume(disk); ublk_put_disk(disk); return index; @@ -5422,9 +5420,9 @@ err_unlock: mutex_unlock(&ub->mutex); ublk_unquiesce_and_resume(disk); err_unpin: - unpin_user_pages(ubuf->pages, pinned); + unpin_user_pages(pages, pinned); err_free_pages: - kvfree(ubuf->pages); + kvfree(pages); err_free: kfree(ubuf); put_disk: @@ -5433,11 +5431,36 @@ put_disk: } static void __ublk_ctrl_unreg_buf(struct ublk_device *ub, - struct ublk_buf *ubuf) + struct ublk_buf *ubuf, int buf_index) { - ublk_buf_erase_ranges(ub, ubuf, ubuf->nr_pages); - unpin_user_pages(ubuf->pages, ubuf->nr_pages); - kvfree(ubuf->pages); + MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX); + struct ublk_buf_range *range; + struct page *pages[32]; + + mas_lock(&mas); + mas_for_each(&mas, range, ULONG_MAX) { + unsigned long base, nr, off; + + if (range->buf_index != buf_index) + continue; + + base = range->base_pfn; + nr = mas.last - mas.index + 1; + mas_erase(&mas); + + for (off = 0; off < nr; ) { + unsigned int batch = min_t(unsigned long, + nr - off, 32); + unsigned int j; + + for (j = 0; j < batch; j++) + pages[j] = pfn_to_page(base + off + j); + unpin_user_pages(pages, batch); + off += batch; + } + kfree(range); + } + mas_unlock(&mas); kfree(ubuf); } @@ -5468,7 +5491,7 @@ static int ublk_ctrl_unreg_buf(struct ublk_device *ub, return -ENOENT; } - __ublk_ctrl_unreg_buf(ub, ubuf); + __ublk_ctrl_unreg_buf(ub, ubuf, index); mutex_unlock(&ub->mutex); @@ -5483,7 +5506,7 @@ static void ublk_buf_cleanup(struct ublk_device *ub) unsigned long index; xa_for_each(&ub->bufs_xa, index, ubuf) - __ublk_ctrl_unreg_buf(ub, ubuf); + __ublk_ctrl_unreg_buf(ub, ubuf, index); xa_destroy(&ub->bufs_xa); mtree_destroy(&ub->buf_tree); } |
