diff options
Diffstat (limited to 'drivers/block')
31 files changed, 815 insertions, 704 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index df38fb364904..77d694448990 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -17,6 +17,7 @@ menuconfig BLK_DEV if BLK_DEV source "drivers/block/null_blk/Kconfig" +source "drivers/block/rnull/Kconfig" config BLK_DEV_FD tristate "Normal floppy disk support" @@ -311,15 +312,6 @@ config VIRTIO_BLK This is the virtual block driver for virtio. It can be used with QEMU based VMMs (like KVM or Xen). Say Y or M. -config BLK_DEV_RUST_NULL - tristate "Rust null block driver (Experimental)" - depends on RUST - help - This is the Rust implementation of the null block driver. For now it - is only a minimal stub. - - If unsure, say N. - config BLK_DEV_RBD tristate "Rados block device (RBD)" depends on INET && BLOCK diff --git a/drivers/block/Makefile b/drivers/block/Makefile index a695ce74ef22..2d8096eb8cdf 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -9,9 +9,6 @@ # needed for trace events ccflags-y += -I$(src) -obj-$(CONFIG_BLK_DEV_RUST_NULL) += rnull_mod.o -rnull_mod-y := rnull.o - obj-$(CONFIG_MAC_FLOPPY) += swim3.o obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o obj-$(CONFIG_BLK_DEV_FD) += floppy.o @@ -38,6 +35,7 @@ obj-$(CONFIG_ZRAM) += zram/ obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/ obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk/ +obj-$(CONFIG_BLK_DEV_RUST_NULL) += rnull/ obj-$(CONFIG_BLK_DEV_UBLK) += ublk_drv.o obj-$(CONFIG_BLK_DEV_ZONED_LOOP) += zloop.o diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 6357d86eafdc..2932b6653b6f 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1523,13 +1523,13 @@ static blk_status_t amiflop_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } -static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int fd_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - int drive = MINOR(bdev->bd_dev) & 3; + struct amiga_floppy_struct *p = disk->private_data; - geo->heads = unit[drive].type->heads; - geo->sectors = unit[drive].dtype->sects * unit[drive].type->sect_mult; - geo->cylinders = unit[drive].type->tracks; + geo->heads = p->type->heads; + geo->sectors = p->dtype->sects * p->type->sect_mult; + geo->cylinders = p->type->tracks; return 0; } diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 00b74a845328..34ead75e7e02 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -269,9 +269,9 @@ static blk_status_t aoeblk_queue_rq(struct blk_mq_hw_ctx *hctx, } static int -aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo) +aoeblk_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct aoedev *d = bdev->bd_disk->private_data; + struct aoedev *d = disk->private_data; if ((d->flags & DEVFL_UP) == 0) { printk(KERN_ERR "aoe: disk not up\n"); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 6298f8e271e3..a9affb7c264d 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -1761,6 +1761,6 @@ aoecmd_exit(void) kfree(kts); kfree(ktiowq); - free_page((unsigned long) page_address(empty_page)); + __free_page(empty_page); empty_page = NULL; } diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c index cdf6e4041bb9..3b21750038ee 100644 --- a/drivers/block/aoe/aoemain.c +++ b/drivers/block/aoe/aoemain.c @@ -44,7 +44,7 @@ aoe_init(void) { int ret; - aoe_wq = alloc_workqueue("aoe_wq", 0, 0); + aoe_wq = alloc_workqueue("aoe_wq", WQ_PERCPU, 0); if (!aoe_wq) return -ENOMEM; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 0c2eabe14af3..9778259b30d4 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -44,45 +44,74 @@ struct brd_device { }; /* - * Look up and return a brd's page for a given sector. + * Look up and return a brd's page with reference grabbed for a given sector. */ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) { - return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT); + struct page *page; + XA_STATE(xas, &brd->brd_pages, sector >> PAGE_SECTORS_SHIFT); + + rcu_read_lock(); +repeat: + page = xas_load(&xas); + if (xas_retry(&xas, page)) { + xas_reset(&xas); + goto repeat; + } + + if (!page) + goto out; + + if (!get_page_unless_zero(page)) { + xas_reset(&xas); + goto repeat; + } + + if (unlikely(page != xas_reload(&xas))) { + put_page(page); + xas_reset(&xas); + goto repeat; + } +out: + rcu_read_unlock(); + + return page; } /* * Insert a new page for a given sector, if one does not already exist. + * The returned page will grab reference. */ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector, blk_opf_t opf) - __releases(rcu) - __acquires(rcu) { gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO; struct page *page, *ret; - rcu_read_unlock(); page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); - if (!page) { - rcu_read_lock(); + if (!page) return ERR_PTR(-ENOMEM); - } xa_lock(&brd->brd_pages); ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL, page, gfp); - rcu_read_lock(); - if (ret) { + if (!ret) { + brd->brd_nr_pages++; + get_page(page); + xa_unlock(&brd->brd_pages); + return page; + } + + if (!xa_is_err(ret)) { + get_page(ret); xa_unlock(&brd->brd_pages); - __free_page(page); - if (xa_is_err(ret)) - return ERR_PTR(xa_err(ret)); + put_page(page); return ret; } - brd->brd_nr_pages++; + xa_unlock(&brd->brd_pages); - return page; + put_page(page); + return ERR_PTR(xa_err(ret)); } /* @@ -95,7 +124,7 @@ static void brd_free_pages(struct brd_device *brd) pgoff_t idx; xa_for_each(&brd->brd_pages, idx, page) { - __free_page(page); + put_page(page); cond_resched(); } @@ -117,7 +146,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio) bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); - rcu_read_lock(); page = brd_lookup_page(brd, sector); if (!page && op_is_write(opf)) { page = brd_insert_page(brd, sector, opf); @@ -135,13 +163,13 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio) memset(kaddr, 0, bv.bv_len); } kunmap_local(kaddr); - rcu_read_unlock(); bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len); + if (page) + put_page(page); return true; out_error: - rcu_read_unlock(); if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT)) bio_wouldblock_error(bio); else @@ -149,13 +177,6 @@ out_error: return false; } -static void brd_free_one_page(struct rcu_head *head) -{ - struct page *page = container_of(head, struct page, rcu_head); - - __free_page(page); -} - static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size) { sector_t aligned_sector = round_up(sector, PAGE_SECTORS); @@ -170,7 +191,7 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size) while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) { page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT); if (page) { - call_rcu(&page->rcu_head, brd_free_one_page); + put_page(page); brd->brd_nr_pages--; } aligned_sector += PAGE_SECTORS; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e21492981f7d..f6d6276974ee 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -380,6 +380,9 @@ enum { /* this is/was a write request */ __EE_WRITE, + /* hand back using mempool_free(e, drbd_buffer_page_pool) */ + __EE_RELEASE_TO_MEMPOOL, + /* this is/was a write same request */ __EE_WRITE_SAME, @@ -402,6 +405,7 @@ enum { #define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE) #define EE_SUBMITTED (1<<__EE_SUBMITTED) #define EE_WRITE (1<<__EE_WRITE) +#define EE_RELEASE_TO_MEMPOOL (1<<__EE_RELEASE_TO_MEMPOOL) #define EE_WRITE_SAME (1<<__EE_WRITE_SAME) #define EE_APPLICATION (1<<__EE_APPLICATION) #define EE_RS_THIN_REQ (1<<__EE_RS_THIN_REQ) @@ -858,7 +862,6 @@ struct drbd_device { struct list_head sync_ee; /* IO in progress (P_RS_DATA_REPLY gets written to disk) */ struct list_head done_ee; /* need to send P_WRITE_ACK */ struct list_head read_ee; /* [RS]P_DATA_REQUEST being read */ - struct list_head net_ee; /* zero-copy network send in progress */ struct list_head resync_reads; atomic_t pp_in_use; /* allocated from page pool */ @@ -1329,24 +1332,6 @@ extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ extern mempool_t drbd_request_mempool; extern mempool_t drbd_ee_mempool; -/* drbd's page pool, used to buffer data received from the peer, - * or data requested by the peer. - * - * This does not have an emergency reserve. - * - * When allocating from this pool, it first takes pages from the pool. - * Only if the pool is depleted will try to allocate from the system. - * - * The assumption is that pages taken from this pool will be processed, - * and given back, "quickly", and then can be recycled, so we can avoid - * frequent calls to alloc_page(), and still will be able to make progress even - * under memory pressure. - */ -extern struct page *drbd_pp_pool; -extern spinlock_t drbd_pp_lock; -extern int drbd_pp_vacant; -extern wait_queue_head_t drbd_pp_wait; - /* We also need a standard (emergency-reserve backed) page pool * for meta data IO (activity log, bitmap). * We can keep it global, as long as it is used as "N pages at a time". @@ -1354,6 +1339,7 @@ extern wait_queue_head_t drbd_pp_wait; */ #define DRBD_MIN_POOL_PAGES 128 extern mempool_t drbd_md_io_page_pool; +extern mempool_t drbd_buffer_page_pool; /* We also need to make sure we get a bio * when we need it for housekeeping purposes */ @@ -1488,10 +1474,7 @@ extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, sector_t, unsigned int, unsigned int, gfp_t) __must_hold(local); -extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *, - int); -#define drbd_free_peer_req(m,e) __drbd_free_peer_req(m, e, 0) -#define drbd_free_net_peer_req(m,e) __drbd_free_peer_req(m, e, 1) +extern void drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *req); extern struct page *drbd_alloc_pages(struct drbd_peer_device *, unsigned int, bool); extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed); extern int drbd_connected(struct drbd_peer_device *); @@ -1610,16 +1593,6 @@ static inline struct page *page_chain_next(struct page *page) for (; page && ({ n = page_chain_next(page); 1; }); page = n) -static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_req) -{ - struct page *page = peer_req->pages; - page_chain_for_each(page) { - if (page_count(page) > 1) - return 1; - } - return 0; -} - static inline union drbd_state drbd_read_state(struct drbd_device *device) { struct drbd_resource *resource = device->resource; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 52724b79be30..c73376886e7a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -114,20 +114,10 @@ struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ mempool_t drbd_request_mempool; mempool_t drbd_ee_mempool; mempool_t drbd_md_io_page_pool; +mempool_t drbd_buffer_page_pool; struct bio_set drbd_md_io_bio_set; struct bio_set drbd_io_bio_set; -/* I do not use a standard mempool, because: - 1) I want to hand out the pre-allocated objects first. - 2) I want to be able to interrupt sleeping allocation with a signal. - Note: This is a single linked list, the next pointer is the private - member of struct page. - */ -struct page *drbd_pp_pool; -DEFINE_SPINLOCK(drbd_pp_lock); -int drbd_pp_vacant; -wait_queue_head_t drbd_pp_wait; - DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5); static const struct block_device_operations drbd_ops = { @@ -1611,6 +1601,7 @@ static int _drbd_send_zc_bio(struct drbd_peer_device *peer_device, struct bio *b static int _drbd_send_zc_ee(struct drbd_peer_device *peer_device, struct drbd_peer_request *peer_req) { + bool use_sendpage = !(peer_req->flags & EE_RELEASE_TO_MEMPOOL); struct page *page = peer_req->pages; unsigned len = peer_req->i.size; int err; @@ -1619,8 +1610,13 @@ static int _drbd_send_zc_ee(struct drbd_peer_device *peer_device, page_chain_for_each(page) { unsigned l = min_t(unsigned, len, PAGE_SIZE); - err = _drbd_send_page(peer_device, page, 0, l, - page_chain_next(page) ? MSG_MORE : 0); + if (likely(use_sendpage)) + err = _drbd_send_page(peer_device, page, 0, l, + page_chain_next(page) ? MSG_MORE : 0); + else + err = _drbd_no_send_page(peer_device, page, 0, l, + page_chain_next(page) ? MSG_MORE : 0); + if (err) return err; len -= l; @@ -1962,7 +1958,6 @@ void drbd_init_set_defaults(struct drbd_device *device) INIT_LIST_HEAD(&device->sync_ee); INIT_LIST_HEAD(&device->done_ee); INIT_LIST_HEAD(&device->read_ee); - INIT_LIST_HEAD(&device->net_ee); INIT_LIST_HEAD(&device->resync_reads); INIT_LIST_HEAD(&device->resync_work.list); INIT_LIST_HEAD(&device->unplug_work.list); @@ -2043,7 +2038,6 @@ void drbd_device_cleanup(struct drbd_device *device) D_ASSERT(device, list_empty(&device->sync_ee)); D_ASSERT(device, list_empty(&device->done_ee)); D_ASSERT(device, list_empty(&device->read_ee)); - D_ASSERT(device, list_empty(&device->net_ee)); D_ASSERT(device, list_empty(&device->resync_reads)); D_ASSERT(device, list_empty(&first_peer_device(device)->connection->sender_work.q)); D_ASSERT(device, list_empty(&device->resync_work.list)); @@ -2055,19 +2049,11 @@ void drbd_device_cleanup(struct drbd_device *device) static void drbd_destroy_mempools(void) { - struct page *page; - - while (drbd_pp_pool) { - page = drbd_pp_pool; - drbd_pp_pool = (struct page *)page_private(page); - __free_page(page); - drbd_pp_vacant--; - } - /* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */ bioset_exit(&drbd_io_bio_set); bioset_exit(&drbd_md_io_bio_set); + mempool_exit(&drbd_buffer_page_pool); mempool_exit(&drbd_md_io_page_pool); mempool_exit(&drbd_ee_mempool); mempool_exit(&drbd_request_mempool); @@ -2086,9 +2072,8 @@ static void drbd_destroy_mempools(void) static int drbd_create_mempools(void) { - struct page *page; const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count; - int i, ret; + int ret; /* caches */ drbd_request_cache = kmem_cache_create( @@ -2125,6 +2110,10 @@ static int drbd_create_mempools(void) if (ret) goto Enomem; + ret = mempool_init_page_pool(&drbd_buffer_page_pool, number, 0); + if (ret) + goto Enomem; + ret = mempool_init_slab_pool(&drbd_request_mempool, number, drbd_request_cache); if (ret) @@ -2134,15 +2123,6 @@ static int drbd_create_mempools(void) if (ret) goto Enomem; - for (i = 0; i < number; i++) { - page = alloc_page(GFP_HIGHUSER); - if (!page) - goto Enomem; - set_page_private(page, (unsigned long)drbd_pp_pool); - drbd_pp_pool = page; - } - drbd_pp_vacant = number; - return 0; Enomem: @@ -2169,10 +2149,6 @@ static void drbd_release_all_peer_reqs(struct drbd_device *device) rr = drbd_free_peer_reqs(device, &device->done_ee); if (rr) drbd_err(device, "%d EEs in done list found!\n", rr); - - rr = drbd_free_peer_reqs(device, &device->net_ee); - if (rr) - drbd_err(device, "%d EEs in net list found!\n", rr); } /* caution. no locking. */ @@ -2863,11 +2839,6 @@ static int __init drbd_init(void) return err; } - /* - * allocate all necessary structs - */ - init_waitqueue_head(&drbd_pp_wait); - drbd_proc = NULL; /* play safe for drbd_cleanup */ idr_init(&drbd_devices); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index e09930c2b226..91f3b8afb63c 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1330,6 +1330,7 @@ void drbd_reconsider_queue_parameters(struct drbd_device *device, lim.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS; else lim.max_write_zeroes_sectors = 0; + lim.max_hw_wzeroes_unmap_sectors = 0; if ((lim.discard_granularity >> SECTOR_SHIFT) > lim.max_hw_discard_sectors) { diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 975024cf03c5..caaf2781136d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -33,6 +33,7 @@ #include <linux/string.h> #include <linux/scatterlist.h> #include <linux/part_stat.h> +#include <linux/mempool.h> #include "drbd_int.h" #include "drbd_protocol.h" #include "drbd_req.h" @@ -63,182 +64,31 @@ static int e_end_block(struct drbd_work *, int); #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) -/* - * some helper functions to deal with single linked page lists, - * page->private being our "next" pointer. - */ - -/* If at least n pages are linked at head, get n pages off. - * Otherwise, don't modify head, and return NULL. - * Locking is the responsibility of the caller. - */ -static struct page *page_chain_del(struct page **head, int n) -{ - struct page *page; - struct page *tmp; - - BUG_ON(!n); - BUG_ON(!head); - - page = *head; - - if (!page) - return NULL; - - while (page) { - tmp = page_chain_next(page); - if (--n == 0) - break; /* found sufficient pages */ - if (tmp == NULL) - /* insufficient pages, don't use any of them. */ - return NULL; - page = tmp; - } - - /* add end of list marker for the returned list */ - set_page_private(page, 0); - /* actual return value, and adjustment of head */ - page = *head; - *head = tmp; - return page; -} - -/* may be used outside of locks to find the tail of a (usually short) - * "private" page chain, before adding it back to a global chain head - * with page_chain_add() under a spinlock. */ -static struct page *page_chain_tail(struct page *page, int *len) -{ - struct page *tmp; - int i = 1; - while ((tmp = page_chain_next(page))) { - ++i; - page = tmp; - } - if (len) - *len = i; - return page; -} - -static int page_chain_free(struct page *page) -{ - struct page *tmp; - int i = 0; - page_chain_for_each_safe(page, tmp) { - put_page(page); - ++i; - } - return i; -} - -static void page_chain_add(struct page **head, - struct page *chain_first, struct page *chain_last) -{ -#if 1 - struct page *tmp; - tmp = page_chain_tail(chain_first, NULL); - BUG_ON(tmp != chain_last); -#endif - - /* add chain to head */ - set_page_private(chain_last, (unsigned long)*head); - *head = chain_first; -} - -static struct page *__drbd_alloc_pages(struct drbd_device *device, - unsigned int number) +static struct page *__drbd_alloc_pages(unsigned int number) { struct page *page = NULL; struct page *tmp = NULL; unsigned int i = 0; - /* Yes, testing drbd_pp_vacant outside the lock is racy. - * So what. It saves a spin_lock. */ - if (drbd_pp_vacant >= number) { - spin_lock(&drbd_pp_lock); - page = page_chain_del(&drbd_pp_pool, number); - if (page) - drbd_pp_vacant -= number; - spin_unlock(&drbd_pp_lock); - if (page) - return page; - } - /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD * "criss-cross" setup, that might cause write-out on some other DRBD, * which in turn might block on the other node at this very place. */ for (i = 0; i < number; i++) { - tmp = alloc_page(GFP_TRY); + tmp = mempool_alloc(&drbd_buffer_page_pool, GFP_TRY); if (!tmp) - break; + goto fail; set_page_private(tmp, (unsigned long)page); page = tmp; } - - if (i == number) - return page; - - /* Not enough pages immediately available this time. - * No need to jump around here, drbd_alloc_pages will retry this - * function "soon". */ - if (page) { - tmp = page_chain_tail(page, NULL); - spin_lock(&drbd_pp_lock); - page_chain_add(&drbd_pp_pool, page, tmp); - drbd_pp_vacant += i; - spin_unlock(&drbd_pp_lock); + return page; +fail: + page_chain_for_each_safe(page, tmp) { + set_page_private(page, 0); + mempool_free(page, &drbd_buffer_page_pool); } return NULL; } -static void reclaim_finished_net_peer_reqs(struct drbd_device *device, - struct list_head *to_be_freed) -{ - struct drbd_peer_request *peer_req, *tmp; - - /* The EEs are always appended to the end of the list. Since - they are sent in order over the wire, they have to finish - in order. As soon as we see the first not finished we can - stop to examine the list... */ - - list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) { - if (drbd_peer_req_has_active_page(peer_req)) - break; - list_move(&peer_req->w.list, to_be_freed); - } -} - -static void drbd_reclaim_net_peer_reqs(struct drbd_device *device) -{ - LIST_HEAD(reclaimed); - struct drbd_peer_request *peer_req, *t; - - spin_lock_irq(&device->resource->req_lock); - reclaim_finished_net_peer_reqs(device, &reclaimed); - spin_unlock_irq(&device->resource->req_lock); - list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) - drbd_free_net_peer_req(device, peer_req); -} - -static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection) -{ - struct drbd_peer_device *peer_device; - int vnr; - - rcu_read_lock(); - idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { - struct drbd_device *device = peer_device->device; - if (!atomic_read(&device->pp_in_use_by_net)) - continue; - - kref_get(&device->kref); - rcu_read_unlock(); - drbd_reclaim_net_peer_reqs(device); - kref_put(&device->kref, drbd_destroy_device); - rcu_read_lock(); - } - rcu_read_unlock(); -} - /** * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled) * @peer_device: DRBD device. @@ -263,9 +113,8 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int bool retry) { struct drbd_device *device = peer_device->device; - struct page *page = NULL; + struct page *page; struct net_conf *nc; - DEFINE_WAIT(wait); unsigned int mxb; rcu_read_lock(); @@ -273,37 +122,9 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int mxb = nc ? nc->max_buffers : 1000000; rcu_read_unlock(); - if (atomic_read(&device->pp_in_use) < mxb) - page = __drbd_alloc_pages(device, number); - - /* Try to keep the fast path fast, but occasionally we need - * to reclaim the pages we lended to the network stack. */ - if (page && atomic_read(&device->pp_in_use_by_net) > 512) - drbd_reclaim_net_peer_reqs(device); - - while (page == NULL) { - prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE); - - drbd_reclaim_net_peer_reqs(device); - - if (atomic_read(&device->pp_in_use) < mxb) { - page = __drbd_alloc_pages(device, number); - if (page) - break; - } - - if (!retry) - break; - - if (signal_pending(current)) { - drbd_warn(device, "drbd_alloc_pages interrupted!\n"); - break; - } - - if (schedule_timeout(HZ/10) == 0) - mxb = UINT_MAX; - } - finish_wait(&drbd_pp_wait, &wait); + if (atomic_read(&device->pp_in_use) >= mxb) + schedule_timeout_interruptible(HZ / 10); + page = __drbd_alloc_pages(number); if (page) atomic_add(number, &device->pp_in_use); @@ -314,29 +135,25 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int * Is also used from inside an other spin_lock_irq(&resource->req_lock); * Either links the page chain back to the global pool, * or returns all pages to the system. */ -static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net) +static void drbd_free_pages(struct drbd_device *device, struct page *page) { - atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use; - int i; + struct page *tmp; + int i = 0; if (page == NULL) return; - if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count) - i = page_chain_free(page); - else { - struct page *tmp; - tmp = page_chain_tail(page, &i); - spin_lock(&drbd_pp_lock); - page_chain_add(&drbd_pp_pool, page, tmp); - drbd_pp_vacant += i; - spin_unlock(&drbd_pp_lock); - } - i = atomic_sub_return(i, a); + page_chain_for_each_safe(page, tmp) { + set_page_private(page, 0); + if (page_count(page) == 1) + mempool_free(page, &drbd_buffer_page_pool); + else + put_page(page); + i++; + } + i = atomic_sub_return(i, &device->pp_in_use); if (i < 0) - drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n", - is_net ? "pp_in_use_by_net" : "pp_in_use", i); - wake_up(&drbd_pp_wait); + drbd_warn(device, "ASSERTION FAILED: pp_in_use: %d < 0\n", i); } /* @@ -380,6 +197,8 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto gfpflags_allow_blocking(gfp_mask)); if (!page) goto fail; + if (!mempool_is_saturated(&drbd_buffer_page_pool)) + peer_req->flags |= EE_RELEASE_TO_MEMPOOL; } memset(peer_req, 0, sizeof(*peer_req)); @@ -403,13 +222,12 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto return NULL; } -void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req, - int is_net) +void drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req) { might_sleep(); if (peer_req->flags & EE_HAS_DIGEST) kfree(peer_req->digest); - drbd_free_pages(device, peer_req->pages, is_net); + drbd_free_pages(device, peer_req->pages); D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0); D_ASSERT(device, drbd_interval_empty(&peer_req->i)); if (!expect(device, !(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) { @@ -424,14 +242,13 @@ int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list) LIST_HEAD(work_list); struct drbd_peer_request *peer_req, *t; int count = 0; - int is_net = list == &device->net_ee; spin_lock_irq(&device->resource->req_lock); list_splice_init(list, &work_list); spin_unlock_irq(&device->resource->req_lock); list_for_each_entry_safe(peer_req, t, &work_list, w.list) { - __drbd_free_peer_req(device, peer_req, is_net); + drbd_free_peer_req(device, peer_req); count++; } return count; @@ -443,18 +260,13 @@ int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list) static int drbd_finish_peer_reqs(struct drbd_device *device) { LIST_HEAD(work_list); - LIST_HEAD(reclaimed); struct drbd_peer_request *peer_req, *t; int err = 0; spin_lock_irq(&device->resource->req_lock); - reclaim_finished_net_peer_reqs(device, &reclaimed); list_splice_init(&device->done_ee, &work_list); spin_unlock_irq(&device->resource->req_lock); - list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) - drbd_free_net_peer_req(device, peer_req); - /* possible callbacks here: * e_end_block, and e_end_resync_block, e_send_superseded. * all ignore the last argument. @@ -1975,7 +1787,7 @@ static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size) data_size -= len; } kunmap(page); - drbd_free_pages(peer_device->device, page, 0); + drbd_free_pages(peer_device->device, page); return err; } @@ -5224,16 +5036,6 @@ static int drbd_disconnected(struct drbd_peer_device *peer_device) put_ldev(device); } - /* tcp_close and release of sendpage pages can be deferred. I don't - * want to use SO_LINGER, because apparently it can be deferred for - * more than 20 seconds (longest time I checked). - * - * Actually we don't care for exactly when the network stack does its - * put_page(), but release our reference on these pages right here. - */ - i = drbd_free_peer_reqs(device, &device->net_ee); - if (i) - drbd_info(device, "net_ee not empty, killed %u entries\n", i); i = atomic_read(&device->pp_in_use_by_net); if (i) drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i); @@ -5980,8 +5782,6 @@ int drbd_ack_receiver(struct drbd_thread *thi) while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); - conn_reclaim_net_peer_reqs(connection); - if (test_and_clear_bit(SEND_PING, &connection->flags)) { if (drbd_send_ping(connection)) { drbd_err(connection, "drbd_send_ping has failed\n"); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a6ea737b3b71..dea3e79d044f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1030,22 +1030,6 @@ out: return 1; } -/* helper */ -static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req) -{ - if (drbd_peer_req_has_active_page(peer_req)) { - /* This might happen if sendpage() has not finished */ - int i = PFN_UP(peer_req->i.size); - atomic_add(i, &device->pp_in_use_by_net); - atomic_sub(i, &device->pp_in_use); - spin_lock_irq(&device->resource->req_lock); - list_add_tail(&peer_req->w.list, &device->net_ee); - spin_unlock_irq(&device->resource->req_lock); - wake_up(&drbd_pp_wait); - } else - drbd_free_peer_req(device, peer_req); -} - /** * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST * @w: work object. @@ -1059,9 +1043,8 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - drbd_free_peer_req(device, peer_req); - dec_unacked(device); - return 0; + err = 0; + goto out; } if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { @@ -1074,12 +1057,12 @@ int w_e_end_data_req(struct drbd_work *w, int cancel) err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req); } - dec_unacked(device); - - move_to_net_ee_or_free(device, peer_req); - if (unlikely(err)) drbd_err(device, "drbd_send_block() failed\n"); +out: + dec_unacked(device); + drbd_free_peer_req(device, peer_req); + return err; } @@ -1120,9 +1103,8 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - drbd_free_peer_req(device, peer_req); - dec_unacked(device); - return 0; + err = 0; + goto out; } if (get_ldev_if_state(device, D_FAILED)) { @@ -1155,13 +1137,12 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) /* update resync data with failure */ drbd_rs_failed_io(peer_device, peer_req->i.sector, peer_req->i.size); } - - dec_unacked(device); - - move_to_net_ee_or_free(device, peer_req); - if (unlikely(err)) drbd_err(device, "drbd_send_block() failed\n"); +out: + dec_unacked(device); + drbd_free_peer_req(device, peer_req); + return err; } @@ -1176,9 +1157,8 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) int err, eq = 0; if (unlikely(cancel)) { - drbd_free_peer_req(device, peer_req); - dec_unacked(device); - return 0; + err = 0; + goto out; } if (get_ldev(device)) { @@ -1220,12 +1200,12 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) if (drbd_ratelimit()) drbd_err(device, "Sending NegDReply. I guess it gets messy.\n"); } - - dec_unacked(device); - move_to_net_ee_or_free(device, peer_req); - if (unlikely(err)) drbd_err(device, "drbd_send_block/ack() failed\n"); +out: + dec_unacked(device); + drbd_free_peer_req(device, peer_req); + return err; } diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 24be0c2c4075..5336c3c5ca36 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -163,35 +163,35 @@ /* do print messages for unexpected interrupts */ static int print_unex = 1; -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/fs.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/workqueue.h> -#include <linux/fdreg.h> -#include <linux/fd.h> -#include <linux/hdreg.h> -#include <linux/errno.h> -#include <linux/slab.h> -#include <linux/mm.h> +#include <linux/async.h> #include <linux/bio.h> -#include <linux/string.h> -#include <linux/jiffies.h> -#include <linux/fcntl.h> +#include <linux/compat.h> #include <linux/delay.h> -#include <linux/mc146818rtc.h> /* CMOS defines */ -#include <linux/ioport.h> -#include <linux/interrupt.h> +#include <linux/errno.h> +#include <linux/fcntl.h> +#include <linux/fd.h> +#include <linux/fdreg.h> +#include <linux/fs.h> +#include <linux/hdreg.h> #include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/ioport.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> #include <linux/major.h> -#include <linux/platform_device.h> +#include <linux/mc146818rtc.h> /* CMOS defines */ +#include <linux/mm.h> #include <linux/mod_devicetable.h> +#include <linux/module.h> #include <linux/mutex.h> -#include <linux/io.h> +#include <linux/platform_device.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/timer.h> #include <linux/uaccess.h> -#include <linux/async.h> -#include <linux/compat.h> +#include <linux/workqueue.h> /* * PS/2 floppies have much slower step rates than regular floppies. @@ -233,8 +233,6 @@ static unsigned short virtual_dma_port = 0x3f0; irqreturn_t floppy_interrupt(int irq, void *dev_id); static int set_dor(int fdc, char mask, char data); -#define K_64 0x10000 /* 64KB */ - /* the following is the mask of allowed drives. By default units 2 and * 3 of both floppy controllers are disabled, because switching on the * motor of these drives causes system hangs on some PCI computers. drive @@ -3092,16 +3090,13 @@ static int raw_cmd_copyin(int cmd, void __user *param, *rcmd = NULL; loop: - ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_KERNEL); - if (!ptr) - return -ENOMEM; + ptr = memdup_user(param, sizeof(*ptr)); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); *rcmd = ptr; - ret = copy_from_user(ptr, param, sizeof(*ptr)); ptr->next = NULL; ptr->buffer_length = 0; ptr->kernel_data = NULL; - if (ret) - return -EFAULT; param += sizeof(struct floppy_raw_cmd); if (ptr->cmd_count > FD_RAW_CMD_FULLSIZE) return -EINVAL; @@ -3363,9 +3358,9 @@ static int get_floppy_geometry(int drive, int type, struct floppy_struct **g) return 0; } -static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int fd_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - int drive = (long)bdev->bd_disk->private_data; + int drive = (long)disk->private_data; int type = ITYPE(drive_state[drive].fd_device); struct floppy_struct *g; int ret; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1b6ee91f8eb9..053a086d547e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -137,20 +137,35 @@ static void loop_global_unlock(struct loop_device *lo, bool global) static int max_part; static int part_shift; -static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) +static loff_t lo_calculate_size(struct loop_device *lo, struct file *file) { loff_t loopsize; + int ret; + + if (S_ISBLK(file_inode(file)->i_mode)) { + loopsize = i_size_read(file->f_mapping->host); + } else { + struct kstat stat; + + /* + * Get the accurate file size. This provides better results than + * cached inode data, particularly for network filesystems where + * metadata may be stale. + */ + ret = vfs_getattr_nosec(&file->f_path, &stat, STATX_SIZE, 0); + if (ret) + return 0; - /* Compute loopsize in bytes */ - loopsize = i_size_read(file->f_mapping->host); - if (offset > 0) - loopsize -= offset; + loopsize = stat.size; + } + + if (lo->lo_offset > 0) + loopsize -= lo->lo_offset; /* offset is beyond i_size, weird but possible */ if (loopsize < 0) return 0; - - if (sizelimit > 0 && sizelimit < loopsize) - loopsize = sizelimit; + if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize) + loopsize = lo->lo_sizelimit; /* * Unfortunately, if we want to do I/O on the device, * the number of 512-byte sectors has to fit into a sector_t. @@ -158,11 +173,6 @@ static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) return loopsize >> 9; } -static loff_t get_loop_size(struct loop_device *lo, struct file *file) -{ - return get_size(lo->lo_offset, lo->lo_sizelimit, file); -} - /* * We support direct I/O only if lo_offset is aligned with the logical I/O size * of backing device, and the logical block size of loop is bigger than that of @@ -569,7 +579,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, error = -EINVAL; /* size of the new backing store needs to be the same */ - if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) + if (lo_calculate_size(lo, file) != lo_calculate_size(lo, old_file)) goto out_err; /* @@ -1063,7 +1073,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, loop_update_dio(lo); loop_sysfs_init(lo); - size = get_loop_size(lo, file); + size = lo_calculate_size(lo, file); loop_set_size(lo, size); /* Order wrt reading lo_state in loop_validate_file(). */ @@ -1255,8 +1265,7 @@ out_unfreeze: if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); if (!err && size_changed) { - loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, - lo->lo_backing_file); + loff_t new_size = lo_calculate_size(lo, lo->lo_backing_file); loop_set_size(lo, new_size); } out_unlock: @@ -1399,7 +1408,7 @@ static int loop_set_capacity(struct loop_device *lo) if (unlikely(lo->lo_state != Lo_bound)) return -ENXIO; - size = get_loop_size(lo, lo->lo_backing_file); + size = lo_calculate_size(lo, lo->lo_backing_file); loop_set_size(lo, size); return 0; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 8fc7761397bd..567192e371a8 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3148,17 +3148,17 @@ static int mtip_block_compat_ioctl(struct block_device *dev, * that each partition is also 4KB aligned. Non-aligned partitions adversely * affects performance. * - * @dev Pointer to the block_device strucutre. + * @disk Pointer to the gendisk strucutre. * @geo Pointer to a hd_geometry structure. * * return value * 0 Operation completed successfully. * -ENOTTY An error occurred while reading the drive capacity. */ -static int mtip_block_getgeo(struct block_device *dev, +static int mtip_block_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct driver_data *dd = dev->bd_disk->private_data; + struct driver_data *dd = disk->private_data; sector_t capacity; if (!dd) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 6463d0e8d0ce..1188f32a5e5e 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -311,7 +311,7 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, if (args) { INIT_WORK(&args->work, nbd_dead_link_work); args->index = nbd->index; - queue_work(system_wq, &args->work); + queue_work(system_percpu_wq, &args->work); } } if (!nsock->dead) { @@ -1217,6 +1217,14 @@ static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd, if (!sock) return NULL; + if (!sk_is_tcp(sock->sk) && + !sk_is_stream_unix(sock->sk)) { + dev_err(disk_to_dev(nbd->disk), "Unsupported socket: should be TCP or UNIX.\n"); + *err = -EINVAL; + sockfd_put(sock); + return NULL; + } + if (sock->ops->shutdown == sock_no_shutdown) { dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n"); *err = -EINVAL; diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index aa163ae9b2aa..f982027e8c85 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -223,7 +223,7 @@ MODULE_PARM_DESC(discard, "Support discard operations (requires memory-backed nu static unsigned long g_cache_size; module_param_named(cache_size, g_cache_size, ulong, 0444); -MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)"); +MODULE_PARM_DESC(cache_size, "Cache size in MiB for memory-backed device. Default: 0 (none)"); static bool g_fua = true; module_param_named(fua, g_fua, bool, 0444); @@ -1179,7 +1179,7 @@ static int copy_from_nullb(struct nullb *nullb, struct page *dest, memcpy_page(dest, off + count, t_page->page, offset, temp); else - zero_user(dest, off + count, temp); + memzero_page(dest, off + count, temp); count += temp; sector += temp >> SECTOR_SHIFT; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index faafd7ff43d6..af0e21149dbc 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -7389,7 +7389,7 @@ static int __init rbd_init(void) * The number of active work items is limited by the number of * rbd devices * queue depth, so leave @max_active at default. */ - rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0); + rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!rbd_wq) { rc = -ENOMEM; goto err_out_slab; diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 15627417f12e..f1409e54010a 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -942,11 +942,11 @@ static void rnbd_client_release(struct gendisk *gen) rnbd_clt_put_dev(dev); } -static int rnbd_client_getgeo(struct block_device *block_device, +static int rnbd_client_getgeo(struct gendisk *disk, struct hd_geometry *geo) { u64 size; - struct rnbd_clt_dev *dev = block_device->bd_disk->private_data; + struct rnbd_clt_dev *dev = disk->private_data; struct queue_limits *limit = &dev->queue->limits; size = dev->size * (limit->logical_block_size / SECTOR_SIZE); @@ -1809,7 +1809,7 @@ static int __init rnbd_client_init(void) unregister_blkdev(rnbd_client_major, "rnbd"); return err; } - rnbd_clt_wq = alloc_workqueue("rnbd_clt_wq", 0, 0); + rnbd_clt_wq = alloc_workqueue("rnbd_clt_wq", WQ_PERCPU, 0); if (!rnbd_clt_wq) { pr_err("Failed to load module, alloc_workqueue failed.\n"); rnbd_clt_destroy_sysfs_files(); diff --git a/drivers/block/rnull.rs b/drivers/block/rnull.rs deleted file mode 100644 index d07e76ae2c13..000000000000 --- a/drivers/block/rnull.rs +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -//! This is a Rust implementation of the C null block driver. -//! -//! Supported features: -//! -//! - blk-mq interface -//! - direct completion -//! - block size 4k -//! -//! The driver is not configurable. - -use kernel::{ - alloc::flags, - block::mq::{ - self, - gen_disk::{self, GenDisk}, - Operations, TagSet, - }, - error::Result, - new_mutex, pr_info, - prelude::*, - sync::{Arc, Mutex}, - types::ARef, -}; - -module! { - type: NullBlkModule, - name: "rnull_mod", - authors: ["Andreas Hindborg"], - description: "Rust implementation of the C null block driver", - license: "GPL v2", -} - -#[pin_data] -struct NullBlkModule { - #[pin] - _disk: Mutex<GenDisk<NullBlkDevice>>, -} - -impl kernel::InPlaceModule for NullBlkModule { - fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> { - pr_info!("Rust null_blk loaded\n"); - - // Use a immediately-called closure as a stable `try` block - let disk = /* try */ (|| { - let tagset = Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?; - - gen_disk::GenDiskBuilder::new() - .capacity_sectors(4096 << 11) - .logical_block_size(4096)? - .physical_block_size(4096)? - .rotational(false) - .build(format_args!("rnullb{}", 0), tagset) - })(); - - try_pin_init!(Self { - _disk <- new_mutex!(disk?, "nullb:disk"), - }) - } -} - -struct NullBlkDevice; - -#[vtable] -impl Operations for NullBlkDevice { - #[inline(always)] - fn queue_rq(rq: ARef<mq::Request<Self>>, _is_last: bool) -> Result { - mq::Request::end_ok(rq) - .map_err(|_e| kernel::error::code::EIO) - // We take no refcounts on the request, so we expect to be able to - // end the request. The request reference must be unique at this - // point, and so `end_ok` cannot fail. - .expect("Fatal error - expected to be able to end request"); - - Ok(()) - } - - fn commit_rqs() {} -} diff --git a/drivers/block/rnull/Kconfig b/drivers/block/rnull/Kconfig new file mode 100644 index 000000000000..7bc5b376c128 --- /dev/null +++ b/drivers/block/rnull/Kconfig @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Rust null block device driver configuration + +config BLK_DEV_RUST_NULL + tristate "Rust null block driver (Experimental)" + depends on RUST && CONFIGFS_FS + help + This is the Rust implementation of the null block driver. Like + the C version, the driver allows the user to create virutal block + devices that can be configured via various configuration options. + + If unsure, say N. diff --git a/drivers/block/rnull/Makefile b/drivers/block/rnull/Makefile new file mode 100644 index 000000000000..11cfa5e615dc --- /dev/null +++ b/drivers/block/rnull/Makefile @@ -0,0 +1,3 @@ + +obj-$(CONFIG_BLK_DEV_RUST_NULL) += rnull_mod.o +rnull_mod-y := rnull.o diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs new file mode 100644 index 000000000000..8498e9bae6fd --- /dev/null +++ b/drivers/block/rnull/configfs.rs @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 + +use super::{NullBlkDevice, THIS_MODULE}; +use core::fmt::{Display, Write}; +use kernel::{ + block::mq::gen_disk::{GenDisk, GenDiskBuilder}, + c_str, + configfs::{self, AttributeOperations}, + configfs_attrs, new_mutex, + page::PAGE_SIZE, + prelude::*, + str::{kstrtobool_bytes, CString}, + sync::Mutex, +}; +use pin_init::PinInit; + +pub(crate) fn subsystem() -> impl PinInit<kernel::configfs::Subsystem<Config>, Error> { + let item_type = configfs_attrs! { + container: configfs::Subsystem<Config>, + data: Config, + child: DeviceConfig, + attributes: [ + features: 0, + ], + }; + + kernel::configfs::Subsystem::new(c_str!("rnull"), item_type, try_pin_init!(Config {})) +} + +#[pin_data] +pub(crate) struct Config {} + +#[vtable] +impl AttributeOperations<0> for Config { + type Data = Config; + + fn show(_this: &Config, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + let mut writer = kernel::str::Formatter::new(page); + writer.write_str("blocksize,size,rotational,irqmode\n")?; + Ok(writer.bytes_written()) + } +} + +#[vtable] +impl configfs::GroupOperations for Config { + type Child = DeviceConfig; + + fn make_group( + &self, + name: &CStr, + ) -> Result<impl PinInit<configfs::Group<DeviceConfig>, Error>> { + let item_type = configfs_attrs! { + container: configfs::Group<DeviceConfig>, + data: DeviceConfig, + attributes: [ + // Named for compatibility with C null_blk + power: 0, + blocksize: 1, + rotational: 2, + size: 3, + irqmode: 4, + ], + }; + + Ok(configfs::Group::new( + name.try_into()?, + item_type, + // TODO: cannot coerce new_mutex!() to impl PinInit<_, Error>, so put mutex inside + try_pin_init!( DeviceConfig { + data <- new_mutex!(DeviceConfigInner { + powered: false, + block_size: 4096, + rotational: false, + disk: None, + capacity_mib: 4096, + irq_mode: IRQMode::None, + name: name.try_into()?, + }), + }), + )) + } +} + +#[derive(Debug, Clone, Copy)] +pub(crate) enum IRQMode { + None, + Soft, +} + +impl TryFrom<u8> for IRQMode { + type Error = kernel::error::Error; + + fn try_from(value: u8) -> Result<Self> { + match value { + 0 => Ok(Self::None), + 1 => Ok(Self::Soft), + _ => Err(EINVAL), + } + } +} + +impl Display for IRQMode { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::None => f.write_str("0")?, + Self::Soft => f.write_str("1")?, + } + Ok(()) + } +} + +#[pin_data] +pub(crate) struct DeviceConfig { + #[pin] + data: Mutex<DeviceConfigInner>, +} + +#[pin_data] +struct DeviceConfigInner { + powered: bool, + name: CString, + block_size: u32, + rotational: bool, + capacity_mib: u64, + irq_mode: IRQMode, + disk: Option<GenDisk<NullBlkDevice>>, +} + +#[vtable] +impl configfs::AttributeOperations<0> for DeviceConfig { + type Data = DeviceConfig; + + fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + let mut writer = kernel::str::Formatter::new(page); + + if this.data.lock().powered { + writer.write_str("1\n")?; + } else { + writer.write_str("0\n")?; + } + + Ok(writer.bytes_written()) + } + + fn store(this: &DeviceConfig, page: &[u8]) -> Result { + let power_op = kstrtobool_bytes(page)?; + let mut guard = this.data.lock(); + + if !guard.powered && power_op { + guard.disk = Some(NullBlkDevice::new( + &guard.name, + guard.block_size, + guard.rotational, + guard.capacity_mib, + guard.irq_mode, + )?); + guard.powered = true; + } else if guard.powered && !power_op { + drop(guard.disk.take()); + guard.powered = false; + } + + Ok(()) + } +} + +#[vtable] +impl configfs::AttributeOperations<1> for DeviceConfig { + type Data = DeviceConfig; + + fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + let mut writer = kernel::str::Formatter::new(page); + writer.write_fmt(fmt!("{}\n", this.data.lock().block_size))?; + Ok(writer.bytes_written()) + } + + fn store(this: &DeviceConfig, page: &[u8]) -> Result { + if this.data.lock().powered { + return Err(EBUSY); + } + + let text = core::str::from_utf8(page)?.trim(); + let value = text.parse::<u32>().map_err(|_| EINVAL)?; + + GenDiskBuilder::validate_block_size(value)?; + this.data.lock().block_size = value; + Ok(()) + } +} + +#[vtable] +impl configfs::AttributeOperations<2> for DeviceConfig { + type Data = DeviceConfig; + + fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + let mut writer = kernel::str::Formatter::new(page); + + if this.data.lock().rotational { + writer.write_str("1\n")?; + } else { + writer.write_str("0\n")?; + } + + Ok(writer.bytes_written()) + } + + fn store(this: &DeviceConfig, page: &[u8]) -> Result { + if this.data.lock().powered { + return Err(EBUSY); + } + + this.data.lock().rotational = kstrtobool_bytes(page)?; + + Ok(()) + } +} + +#[vtable] +impl configfs::AttributeOperations<3> for DeviceConfig { + type Data = DeviceConfig; + + fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + let mut writer = kernel::str::Formatter::new(page); + writer.write_fmt(fmt!("{}\n", this.data.lock().capacity_mib))?; + Ok(writer.bytes_written()) + } + + fn store(this: &DeviceConfig, page: &[u8]) -> Result { + if this.data.lock().powered { + return Err(EBUSY); + } + + let text = core::str::from_utf8(page)?.trim(); + let value = text.parse::<u64>().map_err(|_| EINVAL)?; + + this.data.lock().capacity_mib = value; + Ok(()) + } +} + +#[vtable] +impl configfs::AttributeOperations<4> for DeviceConfig { + type Data = DeviceConfig; + + fn show(this: &DeviceConfig, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + let mut writer = kernel::str::Formatter::new(page); + writer.write_fmt(fmt!("{}\n", this.data.lock().irq_mode))?; + Ok(writer.bytes_written()) + } + + fn store(this: &DeviceConfig, page: &[u8]) -> Result { + if this.data.lock().powered { + return Err(EBUSY); + } + + let text = core::str::from_utf8(page)?.trim(); + let value = text.parse::<u8>().map_err(|_| EINVAL)?; + + this.data.lock().irq_mode = IRQMode::try_from(value)?; + Ok(()) + } +} diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs new file mode 100644 index 000000000000..1ec694d7f1a6 --- /dev/null +++ b/drivers/block/rnull/rnull.rs @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! This is a Rust implementation of the C null block driver. + +mod configfs; + +use configfs::IRQMode; +use kernel::{ + block::{ + self, + mq::{ + self, + gen_disk::{self, GenDisk}, + Operations, TagSet, + }, + }, + error::Result, + pr_info, + prelude::*, + sync::Arc, + types::ARef, +}; +use pin_init::PinInit; + +module! { + type: NullBlkModule, + name: "rnull_mod", + authors: ["Andreas Hindborg"], + description: "Rust implementation of the C null block driver", + license: "GPL v2", +} + +#[pin_data] +struct NullBlkModule { + #[pin] + configfs_subsystem: kernel::configfs::Subsystem<configfs::Config>, +} + +impl kernel::InPlaceModule for NullBlkModule { + fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> { + pr_info!("Rust null_blk loaded\n"); + + try_pin_init!(Self { + configfs_subsystem <- configfs::subsystem(), + }) + } +} + +struct NullBlkDevice; + +impl NullBlkDevice { + fn new( + name: &CStr, + block_size: u32, + rotational: bool, + capacity_mib: u64, + irq_mode: IRQMode, + ) -> Result<GenDisk<Self>> { + let tagset = Arc::pin_init(TagSet::new(1, 256, 1), GFP_KERNEL)?; + + let queue_data = Box::new(QueueData { irq_mode }, GFP_KERNEL)?; + + gen_disk::GenDiskBuilder::new() + .capacity_sectors(capacity_mib << (20 - block::SECTOR_SHIFT)) + .logical_block_size(block_size)? + .physical_block_size(block_size)? + .rotational(rotational) + .build(fmt!("{}", name.to_str()?), tagset, queue_data) + } +} + +struct QueueData { + irq_mode: IRQMode, +} + +#[vtable] +impl Operations for NullBlkDevice { + type QueueData = KBox<QueueData>; + + #[inline(always)] + fn queue_rq(queue_data: &QueueData, rq: ARef<mq::Request<Self>>, _is_last: bool) -> Result { + match queue_data.irq_mode { + IRQMode::None => mq::Request::end_ok(rq) + .map_err(|_e| kernel::error::code::EIO) + // We take no refcounts on the request, so we expect to be able to + // end the request. The request reference must be unique at this + // point, and so `end_ok` cannot fail. + .expect("Fatal error - expected to be able to end request"), + IRQMode::Soft => mq::Request::complete(rq), + } + Ok(()) + } + + fn commit_rqs(_queue_data: &QueueData) {} + + fn complete(rq: ARef<mq::Request<Self>>) { + mq::Request::end_ok(rq) + .map_err(|_e| kernel::error::code::EIO) + // We take no refcounts on the request, so we expect to be able to + // end the request. The request reference must be unique at this + // point, and so `end_ok` cannot fail. + .expect("Fatal error - expected to be able to end request"); + } +} diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 7af21fe67671..db1fe9772a4d 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -119,9 +119,8 @@ static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr) return vio_dring_avail(dr, VDC_TX_RING_SIZE); } -static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int vdc_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct gendisk *disk = bdev->bd_disk; sector_t nsect = get_capacity(disk); sector_t cylinders = nsect; @@ -1189,7 +1188,7 @@ static void vdc_ldc_reset(struct vdc_port *port) } if (port->ldc_timeout) - mod_delayed_work(system_wq, &port->ldc_reset_timer_work, + mod_delayed_work(system_percpu_wq, &port->ldc_reset_timer_work, round_jiffies(jiffies + HZ * port->ldc_timeout)); mod_timer(&port->vio.timer, round_jiffies(jiffies + HZ)); return; @@ -1217,7 +1216,7 @@ static int __init vdc_init(void) { int err; - sunvdc_wq = alloc_workqueue("sunvdc", 0, 0); + sunvdc_wq = alloc_workqueue("sunvdc", WQ_PERCPU, 0); if (!sunvdc_wq) return -ENOMEM; diff --git a/drivers/block/swim.c b/drivers/block/swim.c index eda33c5eb5e2..416015947ae6 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -711,9 +711,9 @@ static int floppy_ioctl(struct block_device *bdev, blk_mode_t mode, return -ENOTTY; } -static int floppy_getgeo(struct block_device *bdev, struct hd_geometry *geo) +static int floppy_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct floppy_state *fs = bdev->bd_disk->private_data; + struct floppy_state *fs = disk->private_data; struct floppy_struct *g; int ret; diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6561d2a561fa..0c74a41a6753 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -201,7 +201,6 @@ struct ublk_queue { bool force_abort; bool canceling; bool fail_io; /* copy of dev->state == UBLK_S_DEV_FAIL_IO */ - unsigned short nr_io_ready; /* how many ios setup */ spinlock_t cancel_lock; struct ublk_device *dev; struct ublk_io ios[]; @@ -234,11 +233,12 @@ struct ublk_device { struct ublk_params params; struct completion completion; - unsigned int nr_queues_ready; - unsigned int nr_privileged_daemon; + u32 nr_io_ready; + bool unprivileged_daemons; struct mutex cancel_mutex; bool canceling; pid_t ublksrv_tgid; + struct delayed_work exit_work; }; /* header of ublk_params */ @@ -251,8 +251,7 @@ static void ublk_io_release(void *priv); static void ublk_stop_dev_unlocked(struct ublk_device *ub); static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq); static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, - const struct ublk_queue *ubq, struct ublk_io *io, - size_t offset); + u16 q_id, u16 tag, struct ublk_io *io, size_t offset); static inline unsigned int ublk_req_build_flags(struct request *req); static inline struct ublksrv_io_desc * @@ -531,7 +530,8 @@ static blk_status_t ublk_setup_iod_zoned(struct ublk_queue *ubq, #endif -static inline void __ublk_complete_rq(struct request *req); +static inline void __ublk_complete_rq(struct request *req, struct ublk_io *io, + bool need_map); static dev_t ublk_chr_devt; static const struct class ublk_chr_class = { @@ -663,22 +663,44 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq) return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY; } +static inline bool ublk_dev_support_zero_copy(const struct ublk_device *ub) +{ + return ub->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY; +} + static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq) { return ubq->flags & UBLK_F_AUTO_BUF_REG; } +static inline bool ublk_dev_support_auto_buf_reg(const struct ublk_device *ub) +{ + return ub->dev_info.flags & UBLK_F_AUTO_BUF_REG; +} + static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) { return ubq->flags & UBLK_F_USER_COPY; } +static inline bool ublk_dev_support_user_copy(const struct ublk_device *ub) +{ + return ub->dev_info.flags & UBLK_F_USER_COPY; +} + static inline bool ublk_need_map_io(const struct ublk_queue *ubq) { return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq) && !ublk_support_auto_buf_reg(ubq); } +static inline bool ublk_dev_need_map_io(const struct ublk_device *ub) +{ + return !ublk_dev_support_user_copy(ub) && + !ublk_dev_support_zero_copy(ub) && + !ublk_dev_support_auto_buf_reg(ub); +} + static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) { /* @@ -696,6 +718,13 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) ublk_support_auto_buf_reg(ubq); } +static inline bool ublk_dev_need_req_ref(const struct ublk_device *ub) +{ + return ublk_dev_support_user_copy(ub) || + ublk_dev_support_zero_copy(ub) || + ublk_dev_support_auto_buf_reg(ub); +} + static inline void ublk_init_req_ref(const struct ublk_queue *ubq, struct ublk_io *io) { @@ -710,8 +739,11 @@ static inline bool ublk_get_req_ref(struct ublk_io *io) static inline void ublk_put_req_ref(struct ublk_io *io, struct request *req) { - if (refcount_dec_and_test(&io->ref)) - __ublk_complete_rq(req); + if (!refcount_dec_and_test(&io->ref)) + return; + + /* ublk_need_map_io() and ublk_need_req_ref() are mutually exclusive */ + __ublk_complete_rq(req, io, false); } static inline bool ublk_sub_req_ref(struct ublk_io *io) @@ -727,6 +759,11 @@ static inline bool ublk_need_get_data(const struct ublk_queue *ubq) return ubq->flags & UBLK_F_NEED_GET_DATA; } +static inline bool ublk_dev_need_get_data(const struct ublk_device *ub) +{ + return ub->dev_info.flags & UBLK_F_NEED_GET_DATA; +} + /* Called in slow path only, keep it noinline for trace purpose */ static noinline struct ublk_device *ublk_get_device(struct ublk_device *ub) { @@ -763,11 +800,9 @@ static inline int __ublk_queue_cmd_buf_size(int depth) return round_up(depth * sizeof(struct ublksrv_io_desc), PAGE_SIZE); } -static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id) +static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub) { - struct ublk_queue *ubq = ublk_get_queue(ub, q_id); - - return __ublk_queue_cmd_buf_size(ubq->q_depth); + return __ublk_queue_cmd_buf_size(ub->dev_info.queue_depth); } static int ublk_max_cmd_buf_size(void) @@ -1018,13 +1053,13 @@ static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req, return rq_bytes; } -static int ublk_unmap_io(const struct ublk_queue *ubq, +static int ublk_unmap_io(bool need_map, const struct request *req, const struct ublk_io *io) { const unsigned int rq_bytes = blk_rq_bytes(req); - if (!ublk_need_map_io(ubq)) + if (!need_map) return rq_bytes; if (ublk_need_unmap_req(req)) { @@ -1071,13 +1106,8 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req) { struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag); struct ublk_io *io = &ubq->ios[req->tag]; - enum req_op op = req_op(req); u32 ublk_op; - if (!ublk_queue_is_zoned(ubq) && - (op_is_zone_mgmt(op) || op == REQ_OP_ZONE_APPEND)) - return BLK_STS_IOERR; - switch (req_op(req)) { case REQ_OP_READ: ublk_op = UBLK_IO_OP_READ; @@ -1116,10 +1146,9 @@ static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu( } /* todo: handle partial completion */ -static inline void __ublk_complete_rq(struct request *req) +static inline void __ublk_complete_rq(struct request *req, struct ublk_io *io, + bool need_map) { - struct ublk_queue *ubq = req->mq_hctx->driver_data; - struct ublk_io *io = &ubq->ios[req->tag]; unsigned int unmapped_bytes; blk_status_t res = BLK_STS_OK; @@ -1143,7 +1172,7 @@ static inline void __ublk_complete_rq(struct request *req) goto exit; /* for READ request, writing data in iod->addr to rq buffers */ - unmapped_bytes = ublk_unmap_io(ubq, req, io); + unmapped_bytes = ublk_unmap_io(need_map, req, io); /* * Extremely impossible since we got data filled in just before @@ -1188,7 +1217,7 @@ static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req, struct io_uring_cmd *cmd = __ublk_prep_compl_io_cmd(io, req); /* tell ublksrv one io request is coming */ - io_uring_cmd_done(cmd, res, 0, issue_flags); + io_uring_cmd_done(cmd, res, issue_flags); } #define UBLK_REQUEUE_DELAY_MS 3 @@ -1389,7 +1418,7 @@ static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq, { blk_status_t res; - if (unlikely(ubq->fail_io)) + if (unlikely(READ_ONCE(ubq->fail_io))) return BLK_STS_TARGET; /* With recovery feature enabled, force_abort is set in @@ -1401,7 +1430,8 @@ static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq, * Note: force_abort is guaranteed to be seen because it is set * before request queue is unqiuesced. */ - if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) + if (ublk_nosrv_should_queue_io(ubq) && + unlikely(READ_ONCE(ubq->force_abort))) return BLK_STS_IOERR; if (check_cancel && unlikely(ubq->canceling)) @@ -1498,9 +1528,6 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) { int i; - /* All old ioucmds have to be completed */ - ubq->nr_io_ready = 0; - for (i = 0; i < ubq->q_depth; i++) { struct ublk_io *io = &ubq->ios[i]; @@ -1549,8 +1576,8 @@ static void ublk_reset_ch_dev(struct ublk_device *ub) /* set to NULL, otherwise new tasks cannot mmap io_cmd_buf */ ub->mm = NULL; - ub->nr_queues_ready = 0; - ub->nr_privileged_daemon = 0; + ub->nr_io_ready = 0; + ub->unprivileged_daemons = false; ub->ublksrv_tgid = -1; } @@ -1594,13 +1621,63 @@ static void ublk_set_canceling(struct ublk_device *ub, bool canceling) ublk_get_queue(ub, i)->canceling = canceling; } -static int ublk_ch_release(struct inode *inode, struct file *filp) +static bool ublk_check_and_reset_active_ref(struct ublk_device *ub) { - struct ublk_device *ub = filp->private_data; + int i, j; + + if (!(ub->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | + UBLK_F_AUTO_BUF_REG))) + return false; + + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + struct ublk_queue *ubq = ublk_get_queue(ub, i); + + for (j = 0; j < ubq->q_depth; j++) { + struct ublk_io *io = &ubq->ios[j]; + unsigned int refs = refcount_read(&io->ref) + + io->task_registered_buffers; + + /* + * UBLK_REFCOUNT_INIT or zero means no active + * reference + */ + if (refs != UBLK_REFCOUNT_INIT && refs != 0) + return true; + + /* reset to zero if the io hasn't active references */ + refcount_set(&io->ref, 0); + io->task_registered_buffers = 0; + } + } + return false; +} + +static void ublk_ch_release_work_fn(struct work_struct *work) +{ + struct ublk_device *ub = + container_of(work, struct ublk_device, exit_work.work); struct gendisk *disk; int i; /* + * For zero-copy and auto buffer register modes, I/O references + * might not be dropped naturally when the daemon is killed, but + * io_uring guarantees that registered bvec kernel buffers are + * unregistered finally when freeing io_uring context, then the + * active references are dropped. + * + * Wait until active references are dropped for avoiding use-after-free + * + * registered buffer may be unregistered in io_ring's release hander, + * so have to wait by scheduling work function for avoiding the two + * file release dependency. + */ + if (ublk_check_and_reset_active_ref(ub)) { + schedule_delayed_work(&ub->exit_work, 1); + return; + } + + /* * disk isn't attached yet, either device isn't live, or it has * been removed already, so we needn't to do anything */ @@ -1644,7 +1721,6 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) * Transition the device to the nosrv state. What exactly this * means depends on the recovery flags */ - blk_mq_quiesce_queue(disk->queue); if (ublk_nosrv_should_stop_dev(ub)) { /* * Allow any pending/future I/O to pass through quickly @@ -1652,8 +1728,7 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) * waits for all pending I/O to complete */ for (i = 0; i < ub->dev_info.nr_hw_queues; i++) - ublk_get_queue(ub, i)->force_abort = true; - blk_mq_unquiesce_queue(disk->queue); + WRITE_ONCE(ublk_get_queue(ub, i)->force_abort, true); ublk_stop_dev_unlocked(ub); } else { @@ -1663,9 +1738,8 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) } else { ub->dev_info.state = UBLK_S_DEV_FAIL_IO; for (i = 0; i < ub->dev_info.nr_hw_queues; i++) - ublk_get_queue(ub, i)->fail_io = true; + WRITE_ONCE(ublk_get_queue(ub, i)->fail_io, true); } - blk_mq_unquiesce_queue(disk->queue); } unlock: mutex_unlock(&ub->mutex); @@ -1675,6 +1749,23 @@ unlock: ublk_reset_ch_dev(ub); out: clear_bit(UB_STATE_OPEN, &ub->state); + + /* put the reference grabbed in ublk_ch_release() */ + ublk_put_device(ub); +} + +static int ublk_ch_release(struct inode *inode, struct file *filp) +{ + struct ublk_device *ub = filp->private_data; + + /* + * Grab ublk device reference, so it won't be gone until we are + * really released from work function. + */ + ublk_get_device(ub); + + INIT_DELAYED_WORK(&ub->exit_work, ublk_ch_release_work_fn); + schedule_delayed_work(&ub->exit_work, 0); return 0; } @@ -1709,23 +1800,23 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma) __func__, q_id, current->pid, vma->vm_start, phys_off, (unsigned long)sz); - if (sz != ublk_queue_cmd_buf_size(ub, q_id)) + if (sz != ublk_queue_cmd_buf_size(ub)) return -EINVAL; pfn = virt_to_phys(ublk_queue_cmd_buf(ub, q_id)) >> PAGE_SHIFT; return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot); } -static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, +static void __ublk_fail_req(struct ublk_device *ub, struct ublk_io *io, struct request *req) { WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE); - if (ublk_nosrv_should_reissue_outstanding(ubq->dev)) + if (ublk_nosrv_should_reissue_outstanding(ub)) blk_mq_requeue_request(req, false); else { io->res = -EIO; - __ublk_complete_rq(req); + __ublk_complete_rq(req, io, ublk_dev_need_map_io(ub)); } } @@ -1745,7 +1836,7 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) struct ublk_io *io = &ubq->ios[i]; if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV) - __ublk_fail_req(ubq, io, io->req); + __ublk_fail_req(ub, io, io->req); } } @@ -1807,7 +1898,7 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, spin_unlock(&ubq->cancel_lock); if (!done) - io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0, issue_flags); + io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, issue_flags); } /* @@ -1850,9 +1941,11 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, ublk_cancel_cmd(ubq, pdu->tag, issue_flags); } -static inline bool ublk_queue_ready(struct ublk_queue *ubq) +static inline bool ublk_dev_ready(const struct ublk_device *ub) { - return ubq->nr_io_ready == ubq->q_depth; + u32 total = (u32)ub->dev_info.nr_hw_queues * ub->dev_info.queue_depth; + + return ub->nr_io_ready == total; } static void ublk_cancel_queue(struct ublk_queue *ubq) @@ -1976,18 +2069,14 @@ static void ublk_reset_io_flags(struct ublk_device *ub) } /* device can only be started after all IOs are ready */ -static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) +static void ublk_mark_io_ready(struct ublk_device *ub) __must_hold(&ub->mutex) { - ubq->nr_io_ready++; - if (ublk_queue_ready(ubq)) { - ub->nr_queues_ready++; - - if (capable(CAP_SYS_ADMIN)) - ub->nr_privileged_daemon++; - } + if (!ub->unprivileged_daemons && !capable(CAP_SYS_ADMIN)) + ub->unprivileged_daemons = true; - if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) { + ub->nr_io_ready++; + if (ublk_dev_ready(ub)) { /* now we are ready for handling ublk io request */ ublk_reset_io_flags(ub); complete_all(&ub->completion); @@ -2058,11 +2147,11 @@ ublk_fill_io_cmd(struct ublk_io *io, struct io_uring_cmd *cmd) } static inline int -ublk_config_io_buf(const struct ublk_queue *ubq, struct ublk_io *io, +ublk_config_io_buf(const struct ublk_device *ub, struct ublk_io *io, struct io_uring_cmd *cmd, unsigned long buf_addr, u16 *buf_idx) { - if (ublk_support_auto_buf_reg(ubq)) + if (ublk_dev_support_auto_buf_reg(ub)) return ublk_handle_auto_buf_reg(io, cmd, buf_idx); io->addr = buf_addr; @@ -2101,18 +2190,18 @@ static void ublk_io_release(void *priv) } static int ublk_register_io_buf(struct io_uring_cmd *cmd, - const struct ublk_queue *ubq, + struct ublk_device *ub, + u16 q_id, u16 tag, struct ublk_io *io, unsigned int index, unsigned int issue_flags) { - struct ublk_device *ub = cmd->file->private_data; struct request *req; int ret; - if (!ublk_support_zero_copy(ubq)) + if (!ublk_dev_support_zero_copy(ub)) return -EINVAL; - req = __ublk_check_and_get_req(ub, ubq, io, 0); + req = __ublk_check_and_get_req(ub, q_id, tag, io, 0); if (!req) return -EINVAL; @@ -2128,7 +2217,8 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd, static int ublk_daemon_register_io_buf(struct io_uring_cmd *cmd, - const struct ublk_queue *ubq, struct ublk_io *io, + struct ublk_device *ub, + u16 q_id, u16 tag, struct ublk_io *io, unsigned index, unsigned issue_flags) { unsigned new_registered_buffers; @@ -2141,9 +2231,10 @@ ublk_daemon_register_io_buf(struct io_uring_cmd *cmd, */ new_registered_buffers = io->task_registered_buffers + 1; if (unlikely(new_registered_buffers >= UBLK_REFCOUNT_INIT)) - return ublk_register_io_buf(cmd, ubq, io, index, issue_flags); + return ublk_register_io_buf(cmd, ub, q_id, tag, io, index, + issue_flags); - if (!ublk_support_zero_copy(ubq) || !ublk_rq_has_data(req)) + if (!ublk_dev_support_zero_copy(ub) || !ublk_rq_has_data(req)) return -EINVAL; ret = io_buffer_register_bvec(cmd, req, ublk_io_release, index, @@ -2165,14 +2256,14 @@ static int ublk_unregister_io_buf(struct io_uring_cmd *cmd, return io_buffer_unregister_bvec(cmd, index, issue_flags); } -static int ublk_check_fetch_buf(const struct ublk_queue *ubq, __u64 buf_addr) +static int ublk_check_fetch_buf(const struct ublk_device *ub, __u64 buf_addr) { - if (ublk_need_map_io(ubq)) { + if (ublk_dev_need_map_io(ub)) { /* * FETCH_RQ has to provide IO buffer if NEED GET * DATA is not enabled */ - if (!buf_addr && !ublk_need_get_data(ubq)) + if (!buf_addr && !ublk_dev_need_get_data(ub)) return -EINVAL; } else if (buf_addr) { /* User copy requires addr to be unset */ @@ -2181,10 +2272,9 @@ static int ublk_check_fetch_buf(const struct ublk_queue *ubq, __u64 buf_addr) return 0; } -static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq, +static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_device *ub, struct ublk_io *io, __u64 buf_addr) { - struct ublk_device *ub = ubq->dev; int ret = 0; /* @@ -2193,8 +2283,8 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq, * FETCH, so it is fine even for IO_URING_F_NONBLOCK. */ mutex_lock(&ub->mutex); - /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ - if (ublk_queue_ready(ubq)) { + /* UBLK_IO_FETCH_REQ is only allowed before dev is setup */ + if (ublk_dev_ready(ub)) { ret = -EBUSY; goto out; } @@ -2208,28 +2298,28 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq, WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV); ublk_fill_io_cmd(io, cmd); - ret = ublk_config_io_buf(ubq, io, cmd, buf_addr, NULL); + ret = ublk_config_io_buf(ub, io, cmd, buf_addr, NULL); if (ret) goto out; WRITE_ONCE(io->task, get_task_struct(current)); - ublk_mark_io_ready(ub, ubq); + ublk_mark_io_ready(ub); out: mutex_unlock(&ub->mutex); return ret; } -static int ublk_check_commit_and_fetch(const struct ublk_queue *ubq, +static int ublk_check_commit_and_fetch(const struct ublk_device *ub, struct ublk_io *io, __u64 buf_addr) { struct request *req = io->req; - if (ublk_need_map_io(ubq)) { + if (ublk_dev_need_map_io(ub)) { /* * COMMIT_AND_FETCH_REQ has to provide IO buffer if * NEED GET DATA is not enabled or it is Read IO. */ - if (!buf_addr && (!ublk_need_get_data(ubq) || + if (!buf_addr && (!ublk_dev_need_get_data(ub) || req_op(req) == REQ_OP_READ)) return -EINVAL; } else if (req_op(req) != REQ_OP_ZONE_APPEND && buf_addr) { @@ -2243,10 +2333,10 @@ static int ublk_check_commit_and_fetch(const struct ublk_queue *ubq, return 0; } -static bool ublk_need_complete_req(const struct ublk_queue *ubq, +static bool ublk_need_complete_req(const struct ublk_device *ub, struct ublk_io *io) { - if (ublk_need_req_ref(ubq)) + if (ublk_dev_need_req_ref(ub)) return ublk_sub_req_ref(io); return true; } @@ -2269,23 +2359,28 @@ static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io, return ublk_start_io(ubq, req, io); } -static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, - unsigned int issue_flags, - const struct ublksrv_io_cmd *ub_cmd) +static int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd, + unsigned int issue_flags) { + /* May point to userspace-mapped memory */ + const struct ublksrv_io_cmd *ub_src = io_uring_sqe_cmd(cmd->sqe); u16 buf_idx = UBLK_INVALID_BUF_IDX; struct ublk_device *ub = cmd->file->private_data; struct ublk_queue *ubq; struct ublk_io *io; u32 cmd_op = cmd->cmd_op; - unsigned tag = ub_cmd->tag; + u16 q_id = READ_ONCE(ub_src->q_id); + u16 tag = READ_ONCE(ub_src->tag); + s32 result = READ_ONCE(ub_src->result); + u64 addr = READ_ONCE(ub_src->addr); /* unioned with zone_append_lba */ struct request *req; int ret; bool compl; + WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED); + pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n", - __func__, cmd->cmd_op, ub_cmd->q_id, tag, - ub_cmd->result); + __func__, cmd->cmd_op, q_id, tag, result); ret = ublk_check_cmd_op(cmd_op); if (ret) @@ -2296,25 +2391,24 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, * so no need to validate the q_id, tag, or task */ if (_IOC_NR(cmd_op) == UBLK_IO_UNREGISTER_IO_BUF) - return ublk_unregister_io_buf(cmd, ub, ub_cmd->addr, - issue_flags); + return ublk_unregister_io_buf(cmd, ub, addr, issue_flags); ret = -EINVAL; - if (ub_cmd->q_id >= ub->dev_info.nr_hw_queues) + if (q_id >= ub->dev_info.nr_hw_queues) goto out; - ubq = ublk_get_queue(ub, ub_cmd->q_id); + ubq = ublk_get_queue(ub, q_id); - if (tag >= ubq->q_depth) + if (tag >= ub->dev_info.queue_depth) goto out; io = &ubq->ios[tag]; /* UBLK_IO_FETCH_REQ can be handled on any task, which sets io->task */ if (unlikely(_IOC_NR(cmd_op) == UBLK_IO_FETCH_REQ)) { - ret = ublk_check_fetch_buf(ubq, ub_cmd->addr); + ret = ublk_check_fetch_buf(ub, addr); if (ret) goto out; - ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr); + ret = ublk_fetch(cmd, ub, io, addr); if (ret) goto out; @@ -2328,8 +2422,8 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, * so can be handled on any task */ if (_IOC_NR(cmd_op) == UBLK_IO_REGISTER_IO_BUF) - return ublk_register_io_buf(cmd, ubq, io, ub_cmd->addr, - issue_flags); + return ublk_register_io_buf(cmd, ub, q_id, tag, io, + addr, issue_flags); goto out; } @@ -2350,24 +2444,24 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, switch (_IOC_NR(cmd_op)) { case UBLK_IO_REGISTER_IO_BUF: - return ublk_daemon_register_io_buf(cmd, ubq, io, ub_cmd->addr, + return ublk_daemon_register_io_buf(cmd, ub, q_id, tag, io, addr, issue_flags); case UBLK_IO_COMMIT_AND_FETCH_REQ: - ret = ublk_check_commit_and_fetch(ubq, io, ub_cmd->addr); + ret = ublk_check_commit_and_fetch(ub, io, addr); if (ret) goto out; - io->res = ub_cmd->result; + io->res = result; req = ublk_fill_io_cmd(io, cmd); - ret = ublk_config_io_buf(ubq, io, cmd, ub_cmd->addr, &buf_idx); - compl = ublk_need_complete_req(ubq, io); + ret = ublk_config_io_buf(ub, io, cmd, addr, &buf_idx); + compl = ublk_need_complete_req(ub, io); /* can't touch 'ublk_io' any more */ if (buf_idx != UBLK_INVALID_BUF_IDX) io_buffer_unregister_bvec(cmd, buf_idx, issue_flags); if (req_op(req) == REQ_OP_ZONE_APPEND) - req->__sector = ub_cmd->zone_append_lba; + req->__sector = addr; if (compl) - __ublk_complete_rq(req); + __ublk_complete_rq(req, io, ublk_dev_need_map_io(ub)); if (ret) goto out; @@ -2379,7 +2473,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, * request */ req = ublk_fill_io_cmd(io, cmd); - ret = ublk_config_io_buf(ubq, io, cmd, ub_cmd->addr, NULL); + ret = ublk_config_io_buf(ub, io, cmd, addr, NULL); WARN_ON_ONCE(ret); if (likely(ublk_get_data(ubq, io, req))) { __ublk_prep_compl_io_cmd(io, req); @@ -2399,16 +2493,15 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, } static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, - const struct ublk_queue *ubq, struct ublk_io *io, size_t offset) + u16 q_id, u16 tag, struct ublk_io *io, size_t offset) { - unsigned tag = io - ubq->ios; struct request *req; /* * can't use io->req in case of concurrent UBLK_IO_COMMIT_AND_FETCH_REQ, * which would overwrite it with io->cmd */ - req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); + req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); if (!req) return NULL; @@ -2430,33 +2523,13 @@ fail_put: return NULL; } -static inline int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd, - unsigned int issue_flags) -{ - /* - * Not necessary for async retry, but let's keep it simple and always - * copy the values to avoid any potential reuse. - */ - const struct ublksrv_io_cmd *ub_src = io_uring_sqe_cmd(cmd->sqe); - const struct ublksrv_io_cmd ub_cmd = { - .q_id = READ_ONCE(ub_src->q_id), - .tag = READ_ONCE(ub_src->tag), - .result = READ_ONCE(ub_src->result), - .addr = READ_ONCE(ub_src->addr) - }; - - WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED); - - return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd); -} - static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd, unsigned int issue_flags) { int ret = ublk_ch_uring_cmd_local(cmd, issue_flags); if (ret != -EIOCBQUEUED) - io_uring_cmd_done(cmd, ret, 0, issue_flags); + io_uring_cmd_done(cmd, ret, issue_flags); } static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) @@ -2519,17 +2592,14 @@ static struct request *ublk_check_and_get_req(struct kiocb *iocb, return ERR_PTR(-EINVAL); ubq = ublk_get_queue(ub, q_id); - if (!ubq) - return ERR_PTR(-EINVAL); - - if (!ublk_support_user_copy(ubq)) + if (!ublk_dev_support_user_copy(ub)) return ERR_PTR(-EACCES); - if (tag >= ubq->q_depth) + if (tag >= ub->dev_info.queue_depth) return ERR_PTR(-EINVAL); *io = &ubq->ios[tag]; - req = __ublk_check_and_get_req(ub, ubq, *io, buf_off); + req = __ublk_check_and_get_req(ub, q_id, tag, *io, buf_off); if (!req) return ERR_PTR(-EINVAL); @@ -2592,7 +2662,7 @@ static const struct file_operations ublk_ch_fops = { static void ublk_deinit_queue(struct ublk_device *ub, int q_id) { - int size = ublk_queue_cmd_buf_size(ub, q_id); + int size = ublk_queue_cmd_buf_size(ub); struct ublk_queue *ubq = ublk_get_queue(ub, q_id); int i; @@ -2619,7 +2689,7 @@ static int ublk_init_queue(struct ublk_device *ub, int q_id) ubq->flags = ub->dev_info.flags; ubq->q_id = q_id; ubq->q_depth = ub->dev_info.queue_depth; - size = ublk_queue_cmd_buf_size(ub, q_id); + size = ublk_queue_cmd_buf_size(ub); ptr = (void *) __get_free_pages(gfp_flags, get_order(size)); if (!ptr) @@ -2880,8 +2950,8 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, ublk_apply_params(ub); - /* don't probe partitions if any one ubq daemon is un-trusted */ - if (ub->nr_privileged_daemon != ub->nr_queues_ready) + /* don't probe partitions if any daemon task is un-trusted */ + if (ub->unprivileged_daemons) set_bit(GD_SUPPRESS_PART_SCAN, &disk->state); ublk_get_device(ub); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index e649fa67bac1..f061420dfb10 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -829,9 +829,9 @@ out: } /* We provide getgeo only to please some old bootloader/partitioning tools */ -static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) +static int virtblk_getgeo(struct gendisk *disk, struct hd_geometry *geo) { - struct virtio_blk *vblk = bd->bd_disk->private_data; + struct virtio_blk *vblk = disk->private_data; int ret = 0; mutex_lock(&vblk->vdev_mutex); @@ -853,7 +853,7 @@ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) /* some standard values, similar to sd */ geo->heads = 1 << 6; geo->sectors = 1 << 5; - geo->cylinders = get_capacity(bd->bd_disk) >> 11; + geo->cylinders = get_capacity(disk) >> 11; } out: mutex_unlock(&vblk->vdev_mutex); @@ -1682,7 +1682,7 @@ static int __init virtio_blk_init(void) { int error; - virtblk_wq = alloc_workqueue("virtio-blk", 0, 0); + virtblk_wq = alloc_workqueue("virtio-blk", WQ_PERCPU, 0); if (!virtblk_wq) return -ENOMEM; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 5babe575c288..04fc6b552c04 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -493,11 +493,11 @@ static void blkif_restart_queue_callback(void *arg) schedule_work(&rinfo->work); } -static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) +static int blkif_getgeo(struct gendisk *disk, struct hd_geometry *hg) { /* We don't have real geometry info, but let's at least return values consistent with the size of the device */ - sector_t nsect = get_capacity(bd->bd_disk); + sector_t nsect = get_capacity(disk); sector_t cylinders = nsect; hg->heads = 0xff; diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c index 553b1a713ab9..a423228e201b 100644 --- a/drivers/block/zloop.c +++ b/drivers/block/zloop.c @@ -700,6 +700,8 @@ static void zloop_free_disk(struct gendisk *disk) struct zloop_device *zlo = disk->private_data; unsigned int i; + blk_mq_free_tag_set(&zlo->tag_set); + for (i = 0; i < zlo->nr_zones; i++) { struct zloop_zone *zone = &zlo->zones[i]; @@ -1080,7 +1082,6 @@ static int zloop_ctl_remove(struct zloop_options *opts) del_gendisk(zlo->disk); put_disk(zlo->disk); - blk_mq_free_tag_set(&zlo->tag_set); pr_info("Removed device %d\n", opts->id); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 8acad3cc6e6e..a43074657531 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1085,7 +1085,7 @@ static int read_from_bdev_sync(struct zram *zram, struct page *page, work.entry = entry; INIT_WORK_ONSTACK(&work.work, zram_sync_read); - queue_work(system_unbound_wq, &work.work); + queue_work(system_dfl_wq, &work.work); flush_work(&work.work); destroy_work_on_stack(&work.work); @@ -1225,18 +1225,6 @@ static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg) zram->comp_algs[prio] = alg; } -static ssize_t __comp_algorithm_show(struct zram *zram, u32 prio, - char *buf, ssize_t at) -{ - ssize_t sz; - - down_read(&zram->init_lock); - sz = zcomp_available_show(zram->comp_algs[prio], buf, at); - up_read(&zram->init_lock); - - return sz; -} - static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf) { char *compressor; @@ -1387,8 +1375,12 @@ static ssize_t comp_algorithm_show(struct device *dev, char *buf) { struct zram *zram = dev_to_zram(dev); + ssize_t sz; - return __comp_algorithm_show(zram, ZRAM_PRIMARY_COMP, buf, 0); + down_read(&zram->init_lock); + sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf, 0); + up_read(&zram->init_lock); + return sz; } static ssize_t comp_algorithm_store(struct device *dev, @@ -1412,14 +1404,15 @@ static ssize_t recomp_algorithm_show(struct device *dev, ssize_t sz = 0; u32 prio; + down_read(&zram->init_lock); for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) { if (!zram->comp_algs[prio]) continue; sz += sysfs_emit_at(buf, sz, "#%d: ", prio); - sz += __comp_algorithm_show(zram, prio, buf, sz); + sz += zcomp_available_show(zram->comp_algs[prio], buf, sz); } - + up_read(&zram->init_lock); return sz; } @@ -1795,6 +1788,7 @@ static int write_same_filled_page(struct zram *zram, unsigned long fill, u32 index) { zram_slot_lock(zram, index); + zram_free_page(zram, index); zram_set_flag(zram, index, ZRAM_SAME); zram_set_handle(zram, index, fill); zram_slot_unlock(zram, index); @@ -1832,6 +1826,7 @@ static int write_incompressible_page(struct zram *zram, struct page *page, kunmap_local(src); zram_slot_lock(zram, index); + zram_free_page(zram, index); zram_set_flag(zram, index, ZRAM_HUGE); zram_set_handle(zram, index, handle); zram_set_obj_size(zram, index, PAGE_SIZE); @@ -1855,11 +1850,6 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index) unsigned long element; bool same_filled; - /* First, free memory allocated to this slot (if any) */ - zram_slot_lock(zram, index); - zram_free_page(zram, index); - zram_slot_unlock(zram, index); - mem = kmap_local_page(page); same_filled = page_same_filled(mem, &element); kunmap_local(mem); @@ -1901,6 +1891,7 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index) zcomp_stream_put(zstrm); zram_slot_lock(zram, index); + zram_free_page(zram, index); zram_set_handle(zram, index, handle); zram_set_obj_size(zram, index, comp_len); zram_slot_unlock(zram, index); |