summaryrefslogtreecommitdiff
path: root/block/blk-core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 21:43:59 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 21:43:59 +0300
commitce8a79d5601aab94c02ed4539c48e8605422ac94 (patch)
tree7830a97a475d57284640c8e2d3516521722708b6 /block/blk-core.c
parent96f7e448b9f4546ffd0356ffceb2b9586777f316 (diff)
parentf596da3efaf4130ff61cd029558845808df9bf99 (diff)
downloadlinux-ce8a79d5601aab94c02ed4539c48e8605422ac94.tar.xz
Merge tag 'for-6.2/block-2022-12-08' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - NVMe pull requests via Christoph: - Support some passthrough commands without CAP_SYS_ADMIN (Kanchan Joshi) - Refactor PCIe probing and reset (Christoph Hellwig) - Various fabrics authentication fixes and improvements (Sagi Grimberg) - Avoid fallback to sequential scan due to transient issues (Uday Shankar) - Implement support for the DEAC bit in Write Zeroes (Christoph Hellwig) - Allow overriding the IEEE OUI and firmware revision in configfs for nvmet (Aleksandr Miloserdov) - Force reconnect when number of queue changes in nvmet (Daniel Wagner) - Minor fixes and improvements (Uros Bizjak, Joel Granados, Sagi Grimberg, Christoph Hellwig, Christophe JAILLET) - Fix and cleanup nvme-fc req allocation (Chaitanya Kulkarni) - Use the common tagset helpers in nvme-pci driver (Christoph Hellwig) - Cleanup the nvme-pci removal path (Christoph Hellwig) - Use kstrtobool() instead of strtobool (Christophe JAILLET) - Allow unprivileged passthrough of Identify Controller (Joel Granados) - Support io stats on the mpath device (Sagi Grimberg) - Minor nvmet cleanup (Sagi Grimberg) - MD pull requests via Song: - Code cleanups (Christoph) - Various fixes - Floppy pull request from Denis: - Fix a memory leak in the init error path (Yuan) - Series fixing some batch wakeup issues with sbitmap (Gabriel) - Removal of the pktcdvd driver that was deprecated more than 5 years ago, and subsequent removal of the devnode callback in struct block_device_operations as no users are now left (Greg) - Fix for partition read on an exclusively opened bdev (Jan) - Series of elevator API cleanups (Jinlong, Christoph) - Series of fixes and cleanups for blk-iocost (Kemeng) - Series of fixes and cleanups for blk-throttle (Kemeng) - Series adding concurrent support for sync queues in BFQ (Yu) - Series bringing drbd a bit closer to the out-of-tree maintained version (Christian, Joel, Lars, Philipp) - Misc drbd fixes (Wang) - blk-wbt fixes and tweaks for enable/disable (Yu) - Fixes for mq-deadline for zoned devices (Damien) - Add support for read-only and offline zones for null_blk (Shin'ichiro) - Series fixing the delayed holder tracking, as used by DM (Yu, Christoph) - Series enabling bio alloc caching for IRQ based IO (Pavel) - Series enabling userspace peer-to-peer DMA (Logan) - BFQ waker fixes (Khazhismel) - Series fixing elevator refcount issues (Christoph, Jinlong) - Series cleaning up references around queue destruction (Christoph) - Series doing quiesce by tagset, enabling cleanups in drivers (Christoph, Chao) - Series untangling the queue kobject and queue references (Christoph) - Misc fixes and cleanups (Bart, David, Dawei, Jinlong, Kemeng, Ye, Yang, Waiman, Shin'ichiro, Randy, Pankaj, Christoph) * tag 'for-6.2/block-2022-12-08' of git://git.kernel.dk/linux: (247 commits) blktrace: Fix output non-blktrace event when blk_classic option enabled block: sed-opal: Don't include <linux/kernel.h> sed-opal: allow using IOC_OPAL_SAVE for locking too blk-cgroup: Fix typo in comment block: remove bio_set_op_attrs nvmet: don't open-code NVME_NS_ATTR_RO enumeration nvme-pci: use the tagset alloc/free helpers nvme: add the Apple shared tag workaround to nvme_alloc_io_tag_set nvme: only set reserved_tags in nvme_alloc_io_tag_set for fabrics controllers nvme: consolidate setting the tagset flags nvme: pass nr_maps explicitly to nvme_alloc_io_tag_set block: bio_copy_data_iter nvme-pci: split out a nvme_pci_ctrl_is_dead helper nvme-pci: return early on ctrl state mismatch in nvme_reset_work nvme-pci: rename nvme_disable_io_queues nvme-pci: cleanup nvme_suspend_queue nvme-pci: remove nvme_pci_disable nvme-pci: remove nvme_disable_admin_queue nvme: merge nvme_shutdown_ctrl into nvme_disable_ctrl nvme: use nvme_wait_ready in nvme_shutdown_ctrl ...
Diffstat (limited to 'block/blk-core.c')
-rw-r--r--block/blk-core.c83
1 files changed, 39 insertions, 44 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 5487912befe8..3866b6c4cd88 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -59,13 +59,12 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_insert);
-DEFINE_IDA(blk_queue_ida);
+static DEFINE_IDA(blk_queue_ida);
/*
* For queue allocation
*/
-struct kmem_cache *blk_requestq_cachep;
-struct kmem_cache *blk_requestq_srcu_cachep;
+static struct kmem_cache *blk_requestq_cachep;
/*
* Controlling structure to kblockd
@@ -253,19 +252,44 @@ void blk_clear_pm_only(struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blk_clear_pm_only);
+static void blk_free_queue_rcu(struct rcu_head *rcu_head)
+{
+ kmem_cache_free(blk_requestq_cachep,
+ container_of(rcu_head, struct request_queue, rcu_head));
+}
+
+static void blk_free_queue(struct request_queue *q)
+{
+ percpu_ref_exit(&q->q_usage_counter);
+
+ if (q->poll_stat)
+ blk_stat_remove_callback(q, q->poll_cb);
+ blk_stat_free_callback(q->poll_cb);
+
+ blk_free_queue_stats(q->stats);
+ kfree(q->poll_stat);
+
+ if (queue_is_mq(q))
+ blk_mq_release(q);
+
+ ida_free(&blk_queue_ida, q->id);
+ call_rcu(&q->rcu_head, blk_free_queue_rcu);
+}
+
/**
* blk_put_queue - decrement the request_queue refcount
* @q: the request_queue structure to decrement the refcount for
*
- * Decrements the refcount of the request_queue kobject. When this reaches 0
- * we'll have blk_release_queue() called.
+ * Decrements the refcount of the request_queue and free it when the refcount
+ * reaches 0.
*
- * Context: Any context, but the last reference must not be dropped from
- * atomic context.
+ * Context: Can sleep.
*/
void blk_put_queue(struct request_queue *q)
{
- kobject_put(&q->kobj);
+ might_sleep();
+ if (refcount_dec_and_test(&q->refs))
+ blk_free_queue(q);
}
EXPORT_SYMBOL(blk_put_queue);
@@ -373,26 +397,20 @@ static void blk_timeout_work(struct work_struct *work)
{
}
-struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
+struct request_queue *blk_alloc_queue(int node_id)
{
struct request_queue *q;
- q = kmem_cache_alloc_node(blk_get_queue_kmem_cache(alloc_srcu),
- GFP_KERNEL | __GFP_ZERO, node_id);
+ q = kmem_cache_alloc_node(blk_requestq_cachep, GFP_KERNEL | __GFP_ZERO,
+ node_id);
if (!q)
return NULL;
- if (alloc_srcu) {
- blk_queue_flag_set(QUEUE_FLAG_HAS_SRCU, q);
- if (init_srcu_struct(q->srcu) != 0)
- goto fail_q;
- }
-
q->last_merge = NULL;
q->id = ida_alloc(&blk_queue_ida, GFP_KERNEL);
if (q->id < 0)
- goto fail_srcu;
+ goto fail_q;
q->stats = blk_alloc_queue_stats();
if (!q->stats)
@@ -406,8 +424,7 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
INIT_WORK(&q->timeout_work, blk_timeout_work);
INIT_LIST_HEAD(&q->icq_list);
- kobject_init(&q->kobj, &blk_queue_ktype);
-
+ refcount_set(&q->refs, 1);
mutex_init(&q->debugfs_mutex);
mutex_init(&q->sysfs_lock);
mutex_init(&q->sysfs_dir_lock);
@@ -434,11 +451,8 @@ fail_stats:
blk_free_queue_stats(q->stats);
fail_id:
ida_free(&blk_queue_ida, q->id);
-fail_srcu:
- if (alloc_srcu)
- cleanup_srcu_struct(q->srcu);
fail_q:
- kmem_cache_free(blk_get_queue_kmem_cache(alloc_srcu), q);
+ kmem_cache_free(blk_requestq_cachep, q);
return NULL;
}
@@ -454,7 +468,7 @@ bool blk_get_queue(struct request_queue *q)
{
if (unlikely(blk_queue_dying(q)))
return false;
- kobject_get(&q->kobj);
+ refcount_inc(&q->refs);
return true;
}
EXPORT_SYMBOL(blk_get_queue);
@@ -945,18 +959,6 @@ unsigned long bdev_start_io_acct(struct block_device *bdev,
EXPORT_SYMBOL(bdev_start_io_acct);
/**
- * bio_start_io_acct_time - start I/O accounting for bio based drivers
- * @bio: bio to start account for
- * @start_time: start time that should be passed back to bio_end_io_acct().
- */
-void bio_start_io_acct_time(struct bio *bio, unsigned long start_time)
-{
- bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
- bio_op(bio), start_time);
-}
-EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
-
-/**
* bio_start_io_acct - start I/O accounting for bio based drivers
* @bio: bio to start account for
*
@@ -1183,9 +1185,6 @@ int __init blk_dev_init(void)
sizeof_field(struct request, cmd_flags));
BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
sizeof_field(struct bio, bi_opf));
- BUILD_BUG_ON(ALIGN(offsetof(struct request_queue, srcu),
- __alignof__(struct request_queue)) !=
- sizeof(struct request_queue));
/* used for unplugging and affects IO latency/throughput - HIGHPRI */
kblockd_workqueue = alloc_workqueue("kblockd",
@@ -1196,10 +1195,6 @@ int __init blk_dev_init(void)
blk_requestq_cachep = kmem_cache_create("request_queue",
sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
- blk_requestq_srcu_cachep = kmem_cache_create("request_queue_srcu",
- sizeof(struct request_queue) +
- sizeof(struct srcu_struct), 0, SLAB_PANIC, NULL);
-
blk_debugfs_root = debugfs_create_dir("block", NULL);
return 0;