From 5ee0524ba137fe928a88b440d014e3c8451fb32c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 28 Feb 2018 10:15:31 -0800 Subject: block: Add 'lock' as third argument to blk_alloc_queue_node() This patch does not change any functionality. Signed-off-by: Bart Van Assche Reviewed-by: Joseph Qi Cc: Christoph Hellwig Cc: Philipp Reisner Cc: Ulf Hansson Cc: Kees Cook Signed-off-by: Jens Axboe --- block/blk-core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 2d1a7bbe0634..e873a24bf82d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -810,7 +810,7 @@ void blk_exit_rl(struct request_queue *q, struct request_list *rl) struct request_queue *blk_alloc_queue(gfp_t gfp_mask) { - return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE); + return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE, NULL); } EXPORT_SYMBOL(blk_alloc_queue); @@ -888,7 +888,8 @@ static void blk_rq_timed_out_timer(struct timer_list *t) kblockd_schedule_work(&q->timeout_work); } -struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) +struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, + spinlock_t *lock) { struct request_queue *q; @@ -1030,7 +1031,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) { struct request_queue *q; - q = blk_alloc_queue_node(GFP_KERNEL, node_id); + q = blk_alloc_queue_node(GFP_KERNEL, node_id, NULL); if (!q) return NULL; -- cgit v1.2.3 From 498f6650aec864e331cae7575fec5f07781d0bf3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 28 Feb 2018 10:15:32 -0800 Subject: block: Fix a race between the cgroup code and request queue initialization Initialize the request queue lock earlier such that the following race can no longer occur: blk_init_queue_node() blkcg_print_blkgs() blk_alloc_queue_node (1) q->queue_lock = &q->__queue_lock (2) blkcg_init_queue(q) (3) spin_lock_irq(blkg->q->queue_lock) (4) q->queue_lock = lock (5) spin_unlock_irq(blkg->q->queue_lock) (6) (1) allocate an uninitialized queue; (2) initialize queue_lock to its default internal lock; (3) initialize blkcg part of request queue, which will create blkg and then insert it to blkg_list; (4) traverse blkg_list and find the created blkg, and then take its queue lock, here it is the default *internal lock*; (5) *race window*, now queue_lock is overridden with *driver specified lock*; (6) now unlock *driver specified lock*, not the locked *internal lock*, unlock balance breaks. The changes in this patch are as follows: - Move the .queue_lock initialization from blk_init_queue_node() into blk_alloc_queue_node(). - Only override the .queue_lock pointer for legacy queues because it is not useful for blk-mq queues to override this pointer. - For all all block drivers that initialize .queue_lock explicitly, change the blk_alloc_queue() call in the driver into a blk_alloc_queue_node() call and remove the explicit .queue_lock initialization. Additionally, initialize the spin lock that will be used as queue lock earlier if necessary. Reported-by: Joseph Qi Signed-off-by: Bart Van Assche Reviewed-by: Joseph Qi Cc: Christoph Hellwig Cc: Philipp Reisner Cc: Ulf Hansson Cc: Kees Cook Signed-off-by: Jens Axboe --- block/blk-core.c | 24 ++++++++++++++++-------- drivers/block/drbd/drbd_main.c | 3 +-- drivers/block/umem.c | 7 +++---- 3 files changed, 20 insertions(+), 14 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index e873a24bf82d..41c74b37be85 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -888,6 +888,19 @@ static void blk_rq_timed_out_timer(struct timer_list *t) kblockd_schedule_work(&q->timeout_work); } +/** + * blk_alloc_queue_node - allocate a request queue + * @gfp_mask: memory allocation flags + * @node_id: NUMA node to allocate memory from + * @lock: For legacy queues, pointer to a spinlock that will be used to e.g. + * serialize calls to the legacy .request_fn() callback. Ignored for + * blk-mq request queues. + * + * Note: pass the queue lock as the third argument to this function instead of + * setting the queue lock pointer explicitly to avoid triggering a sporadic + * crash in the blkcg code. This function namely calls blkcg_init_queue() and + * the queue lock pointer must be set before blkcg_init_queue() is called. + */ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, spinlock_t *lock) { @@ -940,11 +953,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, mutex_init(&q->sysfs_lock); spin_lock_init(&q->__queue_lock); - /* - * By default initialize queue_lock to internal lock and driver can - * override it later if need be. - */ - q->queue_lock = &q->__queue_lock; + if (!q->mq_ops) + q->queue_lock = lock ? : &q->__queue_lock; /* * A queue starts its life with bypass turned on to avoid @@ -1031,13 +1041,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) { struct request_queue *q; - q = blk_alloc_queue_node(GFP_KERNEL, node_id, NULL); + q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock); if (!q) return NULL; q->request_fn = rfn; - if (lock) - q->queue_lock = lock; if (blk_init_allocated_queue(q) < 0) { blk_cleanup_queue(q); return NULL; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 0a0394aa1b9c..185f1ef00a7c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2816,7 +2816,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_init_set_defaults(device); - q = blk_alloc_queue(GFP_KERNEL); + q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock); if (!q) goto out_no_q; device->rq_queue = q; @@ -2848,7 +2848,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig /* Setting the max_hw_sectors to an odd value of 8kibyte here This triggers a max_bio_size message upon first attach or connect */ blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); - q->queue_lock = &resource->req_lock; device->md_io.page = alloc_page(GFP_KERNEL); if (!device->md_io.page) diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 8077123678ad..5c7fb8cc4149 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -888,13 +888,14 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) card->Active = -1; /* no page is active */ card->bio = NULL; card->biotail = &card->bio; + spin_lock_init(&card->lock); - card->queue = blk_alloc_queue(GFP_KERNEL); + card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, + &card->lock); if (!card->queue) goto failed_alloc; blk_queue_make_request(card->queue, mm_make_request); - card->queue->queue_lock = &card->lock; card->queue->queuedata = card; tasklet_init(&card->tasklet, process_page, (unsigned long)card); @@ -968,8 +969,6 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) dev_printk(KERN_INFO, &card->dev->dev, "Window size %d bytes, IRQ %d\n", data, dev->irq); - spin_lock_init(&card->lock); - pci_set_drvdata(dev, card); if (pci_write_cmd != 0x0F) /* If not Memory Write & Invalidate */ -- cgit v1.2.3 From a063057d7c731cffa7d10740e8ebc2970df8dbb3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 28 Feb 2018 10:15:33 -0800 Subject: block: Fix a race between request queue removal and the block cgroup controller Avoid that the following race can occur: blk_cleanup_queue() blkcg_print_blkgs() spin_lock_irq(lock) (1) spin_lock_irq(blkg->q->queue_lock) (2,5) q->queue_lock = &q->__queue_lock (3) spin_unlock_irq(lock) (4) spin_unlock_irq(blkg->q->queue_lock) (6) (1) take driver lock; (2) busy loop for driver lock; (3) override driver lock with internal lock; (4) unlock driver lock; (5) can take driver lock now; (6) but unlock internal lock. This change is safe because only the SCSI core and the NVME core keep a reference on a request queue after having called blk_cleanup_queue(). Neither driver accesses any of the removed data structures between its blk_cleanup_queue() and blk_put_queue() calls. Reported-by: Joseph Qi Signed-off-by: Bart Van Assche Reviewed-by: Joseph Qi Cc: Jan Kara Signed-off-by: Jens Axboe --- block/blk-core.c | 31 +++++++++++++++++++++++++++++++ block/blk-sysfs.c | 7 ------- 2 files changed, 31 insertions(+), 7 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 41c74b37be85..6febc69a58aa 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -719,6 +719,37 @@ void blk_cleanup_queue(struct request_queue *q) del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer); blk_sync_queue(q); + /* + * I/O scheduler exit is only safe after the sysfs scheduler attribute + * has been removed. + */ + WARN_ON_ONCE(q->kobj.state_in_sysfs); + + /* + * Since the I/O scheduler exit code may access cgroup information, + * perform I/O scheduler exit before disassociating from the block + * cgroup controller. + */ + if (q->elevator) { + ioc_clear_queue(q); + elevator_exit(q, q->elevator); + q->elevator = NULL; + } + + /* + * Remove all references to @q from the block cgroup controller before + * restoring @q->queue_lock to avoid that restoring this pointer causes + * e.g. blkcg_print_blkgs() to crash. + */ + blkcg_exit_queue(q); + + /* + * Since the cgroup code may dereference the @q->backing_dev_info + * pointer, only decrease its reference count after having removed the + * association with the block cgroup controller. + */ + bdi_put(q->backing_dev_info); + if (q->mq_ops) blk_mq_free_queue(q); percpu_ref_exit(&q->q_usage_counter); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index cbea895a5547..fd71a00c9462 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -798,13 +798,6 @@ static void __blk_release_queue(struct work_struct *work) if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags)) blk_stat_remove_callback(q, q->poll_cb); blk_stat_free_callback(q->poll_cb); - bdi_put(q->backing_dev_info); - blkcg_exit_queue(q); - - if (q->elevator) { - ioc_clear_queue(q); - elevator_exit(q, q->elevator); - } blk_free_queue_stats(q->stats); -- cgit v1.2.3 From f78bac2c8e69144781e271d9771bae8dbb4e7098 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 7 Mar 2018 17:10:03 -0800 Subject: block: Use the queue_flag_*() functions instead of open-coding these Except for changing the atomic queue flag manipulations that are protected by the queue lock into non-atomic manipulations, this patch does not change any functionality. Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Ming Lei Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-mq.c | 2 +- block/blk-settings.c | 4 ++-- block/blk-stat.c | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 6febc69a58aa..241b73088617 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -994,7 +994,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, * registered by blk_register_queue(). */ q->bypass_depth = 1; - __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); + queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q); init_waitqueue_head(&q->mq_freeze_wq); diff --git a/block/blk-mq.c b/block/blk-mq.c index 75336848f7a7..e70cc7d48f58 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2678,7 +2678,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; if (!(set->flags & BLK_MQ_F_SG_MERGE)) - q->queue_flags |= 1 << QUEUE_FLAG_NO_SG_MERGE; + queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); q->sg_reserved_size = INT_MAX; diff --git a/block/blk-settings.c b/block/blk-settings.c index 48ebe6be07b7..7f719da0eadd 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -861,9 +861,9 @@ void blk_queue_flush_queueable(struct request_queue *q, bool queueable) { spin_lock_irq(q->queue_lock); if (queueable) - clear_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_FLUSH_NQ, q); else - set_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_FLUSH_NQ, q); spin_unlock_irq(q->queue_lock); } EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); diff --git a/block/blk-stat.c b/block/blk-stat.c index 28003bf9941c..b664aa6df725 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -152,7 +152,7 @@ void blk_stat_add_callback(struct request_queue *q, spin_lock(&q->stats->lock); list_add_tail_rcu(&cb->list, &q->stats->callbacks); - set_bit(QUEUE_FLAG_STATS, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_STATS, q); spin_unlock(&q->stats->lock); } EXPORT_SYMBOL_GPL(blk_stat_add_callback); @@ -163,7 +163,7 @@ void blk_stat_remove_callback(struct request_queue *q, spin_lock(&q->stats->lock); list_del_rcu(&cb->list); if (list_empty(&q->stats->callbacks) && !q->stats->enable_accounting) - clear_bit(QUEUE_FLAG_STATS, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_STATS, q); spin_unlock(&q->stats->lock); del_timer_sync(&cb->timer); @@ -191,7 +191,7 @@ void blk_stat_enable_accounting(struct request_queue *q) { spin_lock(&q->stats->lock); q->stats->enable_accounting = true; - set_bit(QUEUE_FLAG_STATS, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_STATS, q); spin_unlock(&q->stats->lock); } -- cgit v1.2.3 From 8814ce8a0f680599a837af18aefdec774e5c7b97 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 7 Mar 2018 17:10:04 -0800 Subject: block: Introduce blk_queue_flag_{set,clear,test_and_{set,clear}}() Introduce functions that modify the queue flags and that protect these modifications with the request queue lock. Except for moving one wake_up_all() call from inside to outside a critical section, this patch does not change any functionality. Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Ming Lei Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-core.c | 91 +++++++++++++++++++++++++++++++++++++++++--------- block/blk-mq.c | 12 ++----- block/blk-settings.c | 6 ++-- block/blk-sysfs.c | 22 ++++-------- block/blk-timeout.c | 6 ++-- include/linux/blkdev.h | 5 +++ 6 files changed, 93 insertions(+), 49 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 241b73088617..74c6283f4509 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -71,6 +71,78 @@ struct kmem_cache *blk_requestq_cachep; */ static struct workqueue_struct *kblockd_workqueue; +/** + * blk_queue_flag_set - atomically set a queue flag + * @flag: flag to be set + * @q: request queue + */ +void blk_queue_flag_set(unsigned int flag, struct request_queue *q) +{ + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + queue_flag_set(flag, q); + spin_unlock_irqrestore(q->queue_lock, flags); +} +EXPORT_SYMBOL(blk_queue_flag_set); + +/** + * blk_queue_flag_clear - atomically clear a queue flag + * @flag: flag to be cleared + * @q: request queue + */ +void blk_queue_flag_clear(unsigned int flag, struct request_queue *q) +{ + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + queue_flag_clear(flag, q); + spin_unlock_irqrestore(q->queue_lock, flags); +} +EXPORT_SYMBOL(blk_queue_flag_clear); + +/** + * blk_queue_flag_test_and_set - atomically test and set a queue flag + * @flag: flag to be set + * @q: request queue + * + * Returns the previous value of @flag - 0 if the flag was not set and 1 if + * the flag was already set. + */ +bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q) +{ + unsigned long flags; + bool res; + + spin_lock_irqsave(q->queue_lock, flags); + res = queue_flag_test_and_set(flag, q); + spin_unlock_irqrestore(q->queue_lock, flags); + + return res; +} +EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set); + +/** + * blk_queue_flag_test_and_clear - atomically test and clear a queue flag + * @flag: flag to be cleared + * @q: request queue + * + * Returns the previous value of @flag - 0 if the flag was not set and 1 if + * the flag was set. + */ +bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q) +{ + unsigned long flags; + bool res; + + spin_lock_irqsave(q->queue_lock, flags); + res = queue_flag_test_and_clear(flag, q); + spin_unlock_irqrestore(q->queue_lock, flags); + + return res; +} +EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_clear); + static void blk_clear_congested(struct request_list *rl, int sync) { #ifdef CONFIG_CGROUP_WRITEBACK @@ -361,25 +433,14 @@ EXPORT_SYMBOL(blk_sync_queue); */ int blk_set_preempt_only(struct request_queue *q) { - unsigned long flags; - int res; - - spin_lock_irqsave(q->queue_lock, flags); - res = queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q); - spin_unlock_irqrestore(q->queue_lock, flags); - - return res; + return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q); } EXPORT_SYMBOL_GPL(blk_set_preempt_only); void blk_clear_preempt_only(struct request_queue *q) { - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q); + blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q); wake_up_all(&q->mq_freeze_wq); - spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL_GPL(blk_clear_preempt_only); @@ -629,9 +690,7 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end); void blk_set_queue_dying(struct request_queue *q) { - spin_lock_irq(q->queue_lock); - queue_flag_set(QUEUE_FLAG_DYING, q); - spin_unlock_irq(q->queue_lock); + blk_queue_flag_set(QUEUE_FLAG_DYING, q); /* * When queue DYING flag is set, we need to block new req diff --git a/block/blk-mq.c b/block/blk-mq.c index e70cc7d48f58..a86899022683 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -194,11 +194,7 @@ EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); */ void blk_mq_quiesce_queue_nowait(struct request_queue *q) { - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - queue_flag_set(QUEUE_FLAG_QUIESCED, q); - spin_unlock_irqrestore(q->queue_lock, flags); + blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q); } EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait); @@ -239,11 +235,7 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue); */ void blk_mq_unquiesce_queue(struct request_queue *q) { - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - queue_flag_clear(QUEUE_FLAG_QUIESCED, q); - spin_unlock_irqrestore(q->queue_lock, flags); + blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q); /* dispatch requests which are inserted during quiescing */ blk_mq_run_hw_queues(q, true); diff --git a/block/blk-settings.c b/block/blk-settings.c index 7f719da0eadd..d1de71124656 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -859,12 +859,10 @@ EXPORT_SYMBOL(blk_queue_update_dma_alignment); void blk_queue_flush_queueable(struct request_queue *q, bool queueable) { - spin_lock_irq(q->queue_lock); if (queueable) - queue_flag_clear(QUEUE_FLAG_FLUSH_NQ, q); + blk_queue_flag_clear(QUEUE_FLAG_FLUSH_NQ, q); else - queue_flag_set(QUEUE_FLAG_FLUSH_NQ, q); - spin_unlock_irq(q->queue_lock); + blk_queue_flag_set(QUEUE_FLAG_FLUSH_NQ, q); } EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index fd71a00c9462..d00d1b0ec109 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -276,12 +276,10 @@ queue_store_##name(struct request_queue *q, const char *page, size_t count) \ if (neg) \ val = !val; \ \ - spin_lock_irq(q->queue_lock); \ if (val) \ - queue_flag_set(QUEUE_FLAG_##flag, q); \ + blk_queue_flag_set(QUEUE_FLAG_##flag, q); \ else \ - queue_flag_clear(QUEUE_FLAG_##flag, q); \ - spin_unlock_irq(q->queue_lock); \ + blk_queue_flag_clear(QUEUE_FLAG_##flag, q); \ return ret; \ } @@ -414,12 +412,10 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page, if (ret < 0) return ret; - spin_lock_irq(q->queue_lock); if (poll_on) - queue_flag_set(QUEUE_FLAG_POLL, q); + blk_queue_flag_set(QUEUE_FLAG_POLL, q); else - queue_flag_clear(QUEUE_FLAG_POLL, q); - spin_unlock_irq(q->queue_lock); + blk_queue_flag_clear(QUEUE_FLAG_POLL, q); return ret; } @@ -487,12 +483,10 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page, if (set == -1) return -EINVAL; - spin_lock_irq(q->queue_lock); if (set) - queue_flag_set(QUEUE_FLAG_WC, q); + blk_queue_flag_set(QUEUE_FLAG_WC, q); else - queue_flag_clear(QUEUE_FLAG_WC, q); - spin_unlock_irq(q->queue_lock); + blk_queue_flag_clear(QUEUE_FLAG_WC, q); return count; } @@ -946,9 +940,7 @@ void blk_unregister_queue(struct gendisk *disk) */ mutex_lock(&q->sysfs_lock); - spin_lock_irq(q->queue_lock); - queue_flag_clear(QUEUE_FLAG_REGISTERED, q); - spin_unlock_irq(q->queue_lock); + blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q); /* * Remove the sysfs attributes before unregistering the queue data diff --git a/block/blk-timeout.c b/block/blk-timeout.c index a05e3676d24a..34a55250f08a 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -57,12 +57,10 @@ ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr, char *p = (char *) buf; val = simple_strtoul(p, &p, 10); - spin_lock_irq(q->queue_lock); if (val) - queue_flag_set(QUEUE_FLAG_FAIL_IO, q); + blk_queue_flag_set(QUEUE_FLAG_FAIL_IO, q); else - queue_flag_clear(QUEUE_FLAG_FAIL_IO, q); - spin_unlock_irq(q->queue_lock); + blk_queue_flag_clear(QUEUE_FLAG_FAIL_IO, q); } return count; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c351aaec3ca7..f84b3c7887b1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -707,6 +707,11 @@ struct request_queue { (1 << QUEUE_FLAG_SAME_COMP) | \ (1 << QUEUE_FLAG_POLL)) +void blk_queue_flag_set(unsigned int flag, struct request_queue *q); +void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); +bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); +bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); + /* * @q->queue_lock is set while a queue is being initialized. Since we know * that no other threads access the queue object before @q->queue_lock has -- cgit v1.2.3 From 52c5e62d4c4beecddc6e1b8045ce1d695fca1ba7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Mar 2018 16:56:53 +0100 Subject: block: bio_check_eod() needs to consider partitions bio_check_eod() should check partition size not the whole disk if bio->bi_partno is non-zero. Do this by moving the call to bio_check_eod() into blk_partition_remap(). Based on an earlier patch from Jiufei Xue. Fixes: 74d46992e0d9 ("block: replace bi_bdev with a gendisk pointer and partitions index") Reported-by: Jiufei Xue Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 93 ++++++++++++++++++++++++-------------------------------- 1 file changed, 40 insertions(+), 53 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 74c6283f4509..5e88c579e896 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2122,7 +2122,7 @@ out_unlock: return BLK_QC_T_NONE; } -static void handle_bad_sector(struct bio *bio) +static void handle_bad_sector(struct bio *bio, sector_t maxsector) { char b[BDEVNAME_SIZE]; @@ -2130,7 +2130,7 @@ static void handle_bad_sector(struct bio *bio) printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n", bio_devname(bio, b), bio->bi_opf, (unsigned long long)bio_end_sector(bio), - (long long)get_capacity(bio->bi_disk)); + (long long)maxsector); } #ifdef CONFIG_FAIL_MAKE_REQUEST @@ -2191,68 +2191,59 @@ static noinline int should_fail_bio(struct bio *bio) } ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO); +/* + * Check whether this bio extends beyond the end of the device or partition. + * This may well happen - the kernel calls bread() without checking the size of + * the device, e.g., when mounting a file system. + */ +static inline int bio_check_eod(struct bio *bio, sector_t maxsector) +{ + unsigned int nr_sectors = bio_sectors(bio); + + if (nr_sectors && maxsector && + (nr_sectors > maxsector || + bio->bi_iter.bi_sector > maxsector - nr_sectors)) { + handle_bad_sector(bio, maxsector); + return -EIO; + } + return 0; +} + /* * Remap block n of partition p to block n+start(p) of the disk. */ static inline int blk_partition_remap(struct bio *bio) { struct hd_struct *p; - int ret = 0; + int ret = -EIO; rcu_read_lock(); p = __disk_get_part(bio->bi_disk, bio->bi_partno); - if (unlikely(!p || should_fail_request(p, bio->bi_iter.bi_size) || - bio_check_ro(bio, p))) { - ret = -EIO; + if (unlikely(!p)) + goto out; + if (unlikely(should_fail_request(p, bio->bi_iter.bi_size))) + goto out; + if (unlikely(bio_check_ro(bio, p))) goto out; - } /* * Zone reset does not include bi_size so bio_sectors() is always 0. * Include a test for the reset op code and perform the remap if needed. */ - if (!bio_sectors(bio) && bio_op(bio) != REQ_OP_ZONE_RESET) - goto out; - - bio->bi_iter.bi_sector += p->start_sect; - bio->bi_partno = 0; - trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p), - bio->bi_iter.bi_sector - p->start_sect); - + if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) { + if (bio_check_eod(bio, part_nr_sects_read(p))) + goto out; + bio->bi_iter.bi_sector += p->start_sect; + bio->bi_partno = 0; + trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p), + bio->bi_iter.bi_sector - p->start_sect); + } + ret = 0; out: rcu_read_unlock(); return ret; } -/* - * Check whether this bio extends beyond the end of the device. - */ -static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) -{ - sector_t maxsector; - - if (!nr_sectors) - return 0; - - /* Test device or partition size, when known. */ - maxsector = get_capacity(bio->bi_disk); - if (maxsector) { - sector_t sector = bio->bi_iter.bi_sector; - - if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { - /* - * This may well happen - the kernel calls bread() - * without checking the size of the device, e.g., when - * mounting a device. - */ - handle_bad_sector(bio); - return 1; - } - } - - return 0; -} - static noinline_for_stack bool generic_make_request_checks(struct bio *bio) { @@ -2263,9 +2254,6 @@ generic_make_request_checks(struct bio *bio) might_sleep(); - if (bio_check_eod(bio, nr_sectors)) - goto end_io; - q = bio->bi_disk->queue; if (unlikely(!q)) { printk(KERN_ERR @@ -2285,17 +2273,16 @@ generic_make_request_checks(struct bio *bio) if (should_fail_bio(bio)) goto end_io; - if (!bio->bi_partno) { - if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0))) + if (bio->bi_partno) { + if (unlikely(blk_partition_remap(bio))) goto end_io; } else { - if (blk_partition_remap(bio)) + if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0))) + goto end_io; + if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk)))) goto end_io; } - if (bio_check_eod(bio, nr_sectors)) - goto end_io; - /* * Filter flush bio's early so that make_request based * drivers without flush support don't have to worry -- cgit v1.2.3 From 818e0fa293ca836eba515615c64680ea916fd7cd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 19 Mar 2018 11:46:13 -0700 Subject: block: Change a rcu_read_{lock,unlock}_sched() pair into rcu_read_{lock,unlock}() scsi_device_quiesce() uses synchronize_rcu() to guarantee that the effect of blk_set_preempt_only() will be visible for percpu_ref_tryget() calls that occur after the queue unfreeze by using the approach explained in https://lwn.net/Articles/573497/. The rcu read lock and unlock calls in blk_queue_enter() form a pair with the synchronize_rcu() call in scsi_device_quiesce(). Both scsi_device_quiesce() and blk_queue_enter() must either use regular RCU or RCU-sched. Since neither the RCU-protected code in blk_queue_enter() nor blk_queue_usage_counter_release() sleeps, regular RCU protection is sufficient. Note: scsi_device_quiesce() does not have to be modified since it already uses synchronize_rcu(). Reported-by: Tejun Heo Fixes: 3a0a529971ec ("block, scsi: Make SCSI quiesce and resume work reliably") Signed-off-by: Bart Van Assche Acked-by: Tejun Heo Cc: Tejun Heo Cc: Hannes Reinecke Cc: Ming Lei Cc: Christoph Hellwig Cc: Johannes Thumshirn Cc: Oleksandr Natalenko Cc: Martin Steigerwald Cc: stable@vger.kernel.org # v4.15 Signed-off-by: Jens Axboe --- block/blk-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 5e88c579e896..a0f675f84f86 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -917,7 +917,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) bool success = false; int ret; - rcu_read_lock_sched(); + rcu_read_lock(); if (percpu_ref_tryget_live(&q->q_usage_counter)) { /* * The code that sets the PREEMPT_ONLY flag is @@ -930,7 +930,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) percpu_ref_put(&q->q_usage_counter); } } - rcu_read_unlock_sched(); + rcu_read_unlock(); if (success) return 0; -- cgit v1.2.3