From bc9fcbf9cb8ec76d340da16fbf48a9a316e14c52 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 19 Oct 2011 14:31:18 +0200 Subject: block: move blk_throtl prototypes to block/blk.h blk_throtl interface is block internal and there's no reason to have them in linux/blkdev.h. Move them to block/blk.h. This patch doesn't introduce any functional change. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 1 + 1 file changed, 1 insertion(+) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a19f58c6fc3a..f3f495ea4eeb 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -10,6 +10,7 @@ #include #include #include "blk-cgroup.h" +#include "blk.h" /* Max dispatch from a group in 1 round */ static int throtl_grp_quantum = 8; -- cgit v1.2.3 From 315fceee81155ef2aeed9316ca72aeea9347db5c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 19 Oct 2011 14:31:25 +0200 Subject: block: drop unnecessary blk_get/put_queue() in scsi_cmd_ioctl() and blk_get_tg() blk_get/put_queue() in scsi_cmd_ioctl() and throtl_get_tg() are completely bogus. The caller must have a reference to the queue on entry and taking an extra reference doesn't change anything. For scsi_cmd_ioctl(), the only effect is that it ends up checking QUEUE_FLAG_DEAD on entry; however, this is bogus as queue can die right after blk_get_queue(). Dead queue should be and is handled in request issue path (it's somewhat broken now but that's a separate problem and doesn't affect this one much). throtl_get_tg() incorrectly assumes that q is rcu freed. Also, it doesn't check return value of blk_get_queue(). If the queue is already dead, it ends up doing an extra put. Drop them. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 8 +------- block/scsi_ioctl.c | 3 +-- 2 files changed, 2 insertions(+), 9 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index f3f495ea4eeb..ecba5fcef201 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -324,12 +324,8 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) /* * Need to allocate a group. Allocation of group also needs allocation * of per cpu stats which in-turn takes a mutex() and can block. Hence - * we need to drop rcu lock and queue_lock before we call alloc - * - * Take the request queue reference to make sure queue does not - * go away once we return from allocation. + * we need to drop rcu lock and queue_lock before we call alloc. */ - blk_get_queue(q); rcu_read_unlock(); spin_unlock_irq(q->queue_lock); @@ -339,13 +335,11 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) * dead */ if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { - blk_put_queue(q); if (tg) kfree(tg); return ERR_PTR(-ENODEV); } - blk_put_queue(q); /* Group allocated and queue is still alive. take the lock */ spin_lock_irq(q->queue_lock); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 4f4230b79bb6..fbdf0d802ec4 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -565,7 +565,7 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod { int err; - if (!q || blk_get_queue(q)) + if (!q) return -ENXIO; switch (cmd) { @@ -686,7 +686,6 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod err = -ENOTTY; } - blk_put_queue(q); return err; } EXPORT_SYMBOL(scsi_cmd_ioctl); -- cgit v1.2.3 From bc16a4f933bc5ed50826b20561e4c3515061998b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 19 Oct 2011 14:33:01 +0200 Subject: block: reorganize throtl_get_tg() and blk_throtl_bio() blk_throtl_bio() and throtl_get_tg() have rather unusual interface. * throtl_get_tg() returns pointer to a valid tg or ERR_PTR(-ENODEV), and drops queue_lock in the latter case. Different locking context depending on return value is error-prone and DEAD state is scheduled to be protected by queue_lock anyway. Move DEAD check inside queue_lock and return valid tg or NULL. * blk_throtl_bio() indicates return status both with its return value and in/out param **@bio. The former is used to indicate whether queue is found to be dead during throtl processing. The latter whether the bio is throttled. There's no point in returning DEAD check result from blk_throtl_bio(). The queue can die after blk_throtl_bio() is finished but before make_request_fn() grabs queue lock. Make it take *@bio instead and return boolean result indicating whether the request is throttled or not. This patch doesn't cause any visible functional difference. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-core.c | 8 ++------ block/blk-throttle.c | 49 +++++++++++++++++-------------------------------- block/blk.h | 6 +++--- 3 files changed, 22 insertions(+), 41 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-core.c b/block/blk-core.c index 149149dd7f7b..6c491f2388e9 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1515,12 +1515,8 @@ generic_make_request_checks(struct bio *bio) goto end_io; } - if (blk_throtl_bio(q, &bio)) - goto end_io; - - /* if bio = NULL, bio has been throttled and will be submitted later. */ - if (!bio) - return false; + if (blk_throtl_bio(q, bio)) + return false; /* throttled, will be resubmitted later */ trace_block_bio_queue(q, bio); return true; diff --git a/block/blk-throttle.c b/block/blk-throttle.c index ecba5fcef201..900a0c98745b 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -303,10 +303,6 @@ throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) return tg; } -/* - * This function returns with queue lock unlocked in case of error, like - * request queue is no more - */ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) { struct throtl_grp *tg = NULL, *__tg = NULL; @@ -330,20 +326,16 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) spin_unlock_irq(q->queue_lock); tg = throtl_alloc_tg(td); - /* - * We might have slept in group allocation. Make sure queue is not - * dead - */ - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { - if (tg) - kfree(tg); - - return ERR_PTR(-ENODEV); - } /* Group allocated and queue is still alive. take the lock */ spin_lock_irq(q->queue_lock); + /* Make sure @q is still alive */ + if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { + kfree(tg); + return NULL; + } + /* * Initialize the new group. After sleeping, read the blkcg again. */ @@ -1118,17 +1110,17 @@ static struct blkio_policy_type blkio_policy_throtl = { .plid = BLKIO_POLICY_THROTL, }; -int blk_throtl_bio(struct request_queue *q, struct bio **biop) +bool blk_throtl_bio(struct request_queue *q, struct bio *bio) { struct throtl_data *td = q->td; struct throtl_grp *tg; - struct bio *bio = *biop; bool rw = bio_data_dir(bio), update_disptime = true; struct blkio_cgroup *blkcg; + bool throttled = false; if (bio->bi_rw & REQ_THROTTLED) { bio->bi_rw &= ~REQ_THROTTLED; - return 0; + goto out; } /* @@ -1147,7 +1139,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, rw, rw_is_sync(bio->bi_rw)); rcu_read_unlock(); - return 0; + goto out; } } rcu_read_unlock(); @@ -1156,18 +1148,10 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) * Either group has not been allocated yet or it is not an unlimited * IO group */ - spin_lock_irq(q->queue_lock); tg = throtl_get_tg(td); - - if (IS_ERR(tg)) { - if (PTR_ERR(tg) == -ENODEV) { - /* - * Queue is gone. No queue lock held here. - */ - return -ENODEV; - } - } + if (unlikely(!tg)) + goto out_unlock; if (tg->nr_queued[rw]) { /* @@ -1195,7 +1179,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) * So keep on trimming slice even if bio is not queued. */ throtl_trim_slice(td, tg, rw); - goto out; + goto out_unlock; } queue_bio: @@ -1207,16 +1191,17 @@ queue_bio: tg->nr_queued[READ], tg->nr_queued[WRITE]); throtl_add_bio_tg(q->td, tg, bio); - *biop = NULL; + throttled = true; if (update_disptime) { tg_update_disptime(td, tg); throtl_schedule_next_dispatch(td); } -out: +out_unlock: spin_unlock_irq(q->queue_lock); - return 0; +out: + return throttled; } int blk_throtl_init(struct request_queue *q) diff --git a/block/blk.h b/block/blk.h index 2b66dc21a493..c018dba4e335 100644 --- a/block/blk.h +++ b/block/blk.h @@ -190,13 +190,13 @@ static inline int blk_do_io_stat(struct request *rq) } #ifdef CONFIG_BLK_DEV_THROTTLING -extern int blk_throtl_bio(struct request_queue *q, struct bio **bio); +extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio); extern int blk_throtl_init(struct request_queue *q); extern void blk_throtl_exit(struct request_queue *q); #else /* CONFIG_BLK_DEV_THROTTLING */ -static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio) +static inline bool blk_throtl_bio(struct request_queue *q, struct bio *bio) { - return 0; + return false; } static inline int blk_throtl_init(struct request_queue *q) { return 0; } static inline void blk_throtl_exit(struct request_queue *q) { } -- cgit v1.2.3 From c9a929dde3913780b5c416f4bb9d9ed804f509ce Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 19 Oct 2011 14:42:16 +0200 Subject: block: fix request_queue lifetime handling by making blk_queue_cleanup() properly shutdown request_queue is refcounted but actually depdends on lifetime management from the queue owner - on blk_cleanup_queue(), block layer expects that there's no request passing through request_queue and no new one will. This is fundamentally broken. The queue owner (e.g. SCSI layer) doesn't have a way to know whether there are other active users before calling blk_cleanup_queue() and other users (e.g. bsg) don't have any guarantee that the queue is and would stay valid while it's holding a reference. With delay added in blk_queue_bio() before queue_lock is grabbed, the following oops can be easily triggered when a device is removed with in-flight IOs. sd 0:0:1:0: [sdb] Stopping disk ata1.01: disabled general protection fault: 0000 [#1] PREEMPT SMP CPU 2 Modules linked in: Pid: 648, comm: test_rawio Not tainted 3.1.0-rc3-work+ #56 Bochs Bochs RIP: 0010:[] [] elv_rqhash_find+0x61/0x100 ... Process test_rawio (pid: 648, threadinfo ffff880019efa000, task ffff880019ef8a80) ... Call Trace: [] elv_merge+0x84/0xe0 [] blk_queue_bio+0xf4/0x400 [] generic_make_request+0xca/0x100 [] submit_bio+0x74/0x100 [] dio_bio_submit+0xbc/0xc0 [] __blockdev_direct_IO+0x92e/0xb40 [] blkdev_direct_IO+0x57/0x60 [] generic_file_aio_read+0x6d5/0x760 [] do_sync_read+0xda/0x120 [] vfs_read+0xc5/0x180 [] sys_pread64+0x9a/0xb0 [] system_call_fastpath+0x16/0x1b This happens because blk_queue_cleanup() destroys the queue and elevator whether IOs are in progress or not and DEAD tests are sprinkled in the request processing path without proper synchronization. Similar problem exists for blk-throtl. On queue cleanup, blk-throtl is shutdown whether it has requests in it or not. Depending on timing, it either oopses or throttled bios are lost putting tasks which are waiting for bio completion into eternal D state. The way it should work is having the usual clear distinction between shutdown and release. Shutdown drains all currently pending requests, marks the queue dead, and performs partial teardown of the now unnecessary part of the queue. Even after shutdown is complete, reference holders are still allowed to issue requests to the queue although they will be immmediately failed. The rest of teardown happens on release. This patch makes the following changes to make blk_queue_cleanup() behave as proper shutdown. * QUEUE_FLAG_DEAD is now set while holding both q->exit_mutex and queue_lock. * Unsynchronized DEAD check in generic_make_request_checks() removed. This couldn't make any meaningful difference as the queue could die after the check. * blk_drain_queue() updated such that it can drain all requests and is now called during cleanup. * blk_throtl updated such that it checks DEAD on grabbing queue_lock, drains all throttled bios during cleanup and free td when queue is released. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-core.c | 57 +++++++++++++++++++++++++++++++++------------------- block/blk-sysfs.c | 1 + block/blk-throttle.c | 50 +++++++++++++++++++++++++++++++++++++++------ block/blk.h | 6 +++++- block/elevator.c | 2 +- 5 files changed, 87 insertions(+), 29 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-core.c b/block/blk-core.c index 034cbb2024f0..7e1523521c70 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -349,11 +349,13 @@ EXPORT_SYMBOL(blk_put_queue); /** * blk_drain_queue - drain requests from request_queue * @q: queue to drain + * @drain_all: whether to drain all requests or only the ones w/ ELVPRIV * - * Drain ELV_PRIV requests from @q. The caller is responsible for ensuring - * that no new requests which need to be drained are queued. + * Drain requests from @q. If @drain_all is set, all requests are drained. + * If not, only ELVPRIV requests are drained. The caller is responsible + * for ensuring that no new requests which need to be drained are queued. */ -void blk_drain_queue(struct request_queue *q) +void blk_drain_queue(struct request_queue *q, bool drain_all) { while (true) { int nr_rqs; @@ -361,9 +363,15 @@ void blk_drain_queue(struct request_queue *q) spin_lock_irq(q->queue_lock); elv_drain_elevator(q); + if (drain_all) + blk_throtl_drain(q); __blk_run_queue(q); - nr_rqs = q->rq.elvpriv; + + if (drain_all) + nr_rqs = q->rq.count[0] + q->rq.count[1]; + else + nr_rqs = q->rq.elvpriv; spin_unlock_irq(q->queue_lock); @@ -373,30 +381,40 @@ void blk_drain_queue(struct request_queue *q) } } -/* - * Note: If a driver supplied the queue lock, it is disconnected - * by this function. The actual state of the lock doesn't matter - * here as the request_queue isn't accessible after this point - * (QUEUE_FLAG_DEAD is set) and no other requests will be queued. +/** + * blk_cleanup_queue - shutdown a request queue + * @q: request queue to shutdown + * + * Mark @q DEAD, drain all pending requests, destroy and put it. All + * future requests will be failed immediately with -ENODEV. */ void blk_cleanup_queue(struct request_queue *q) { - /* - * We know we have process context here, so we can be a little - * cautious and ensure that pending block actions on this device - * are done before moving on. Going into this function, we should - * not have processes doing IO to this device. - */ - blk_sync_queue(q); + spinlock_t *lock = q->queue_lock; - del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); + /* mark @q DEAD, no new request or merges will be allowed afterwards */ mutex_lock(&q->sysfs_lock); queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); - mutex_unlock(&q->sysfs_lock); + + spin_lock_irq(lock); + queue_flag_set(QUEUE_FLAG_NOMERGES, q); + queue_flag_set(QUEUE_FLAG_NOXMERGES, q); + queue_flag_set(QUEUE_FLAG_DEAD, q); if (q->queue_lock != &q->__queue_lock) q->queue_lock = &q->__queue_lock; + spin_unlock_irq(lock); + mutex_unlock(&q->sysfs_lock); + + /* drain all requests queued before DEAD marking */ + blk_drain_queue(q, true); + + /* @q won't process any more request, flush async actions */ + del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); + blk_sync_queue(q); + + /* @q is and will stay empty, shutdown and put */ blk_put_queue(q); } EXPORT_SYMBOL(blk_cleanup_queue); @@ -1509,9 +1527,6 @@ generic_make_request_checks(struct bio *bio) goto end_io; } - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) - goto end_io; - part = bio->bi_bdev->bd_part; if (should_fail_request(part, bio->bi_size) || should_fail_request(&part_to_disk(part)->part0, diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index a8eff5f8b9c5..e7f9f657f105 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -490,6 +490,7 @@ static void blk_release_queue(struct kobject *kobj) if (q->queue_tags) __blk_queue_free_tags(q); + blk_throtl_release(q); blk_trace_shutdown(q); bdi_destroy(&q->backing_dev_info); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 900a0c98745b..8edb9499b509 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -309,6 +309,10 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) struct blkio_cgroup *blkcg; struct request_queue *q = td->queue; + /* no throttling for dead queue */ + if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) + return NULL; + rcu_read_lock(); blkcg = task_blkio_cgroup(current); tg = throtl_find_tg(td, blkcg); @@ -1001,11 +1005,6 @@ static void throtl_release_tgs(struct throtl_data *td) } } -static void throtl_td_free(struct throtl_data *td) -{ - kfree(td); -} - /* * Blk cgroup controller notification saying that blkio_group object is being * delinked as associated cgroup object is going away. That also means that @@ -1204,6 +1203,41 @@ out: return throttled; } +/** + * blk_throtl_drain - drain throttled bios + * @q: request_queue to drain throttled bios for + * + * Dispatch all currently throttled bios on @q through ->make_request_fn(). + */ +void blk_throtl_drain(struct request_queue *q) + __releases(q->queue_lock) __acquires(q->queue_lock) +{ + struct throtl_data *td = q->td; + struct throtl_rb_root *st = &td->tg_service_tree; + struct throtl_grp *tg; + struct bio_list bl; + struct bio *bio; + + lockdep_is_held(q->queue_lock); + + bio_list_init(&bl); + + while ((tg = throtl_rb_first(st))) { + throtl_dequeue_tg(td, tg); + + while ((bio = bio_list_peek(&tg->bio_lists[READ]))) + tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl); + while ((bio = bio_list_peek(&tg->bio_lists[WRITE]))) + tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl); + } + spin_unlock_irq(q->queue_lock); + + while ((bio = bio_list_pop(&bl))) + generic_make_request(bio); + + spin_lock_irq(q->queue_lock); +} + int blk_throtl_init(struct request_queue *q) { struct throtl_data *td; @@ -1276,7 +1310,11 @@ void blk_throtl_exit(struct request_queue *q) * it. */ throtl_shutdown_wq(q); - throtl_td_free(td); +} + +void blk_throtl_release(struct request_queue *q) +{ + kfree(q->td); } static int __init throtl_init(void) diff --git a/block/blk.h b/block/blk.h index c018dba4e335..3f6551b3c92d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -15,7 +15,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); int blk_rq_append_bio(struct request_queue *q, struct request *rq, struct bio *bio); -void blk_drain_queue(struct request_queue *q); +void blk_drain_queue(struct request_queue *q, bool drain_all); void blk_dequeue_request(struct request *rq); void __blk_queue_free_tags(struct request_queue *q); bool __blk_end_bidi_request(struct request *rq, int error, @@ -191,15 +191,19 @@ static inline int blk_do_io_stat(struct request *rq) #ifdef CONFIG_BLK_DEV_THROTTLING extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio); +extern void blk_throtl_drain(struct request_queue *q); extern int blk_throtl_init(struct request_queue *q); extern void blk_throtl_exit(struct request_queue *q); +extern void blk_throtl_release(struct request_queue *q); #else /* CONFIG_BLK_DEV_THROTTLING */ static inline bool blk_throtl_bio(struct request_queue *q, struct bio *bio) { return false; } +static inline void blk_throtl_drain(struct request_queue *q) { } static inline int blk_throtl_init(struct request_queue *q) { return 0; } static inline void blk_throtl_exit(struct request_queue *q) { } +static inline void blk_throtl_release(struct request_queue *q) { } #endif /* CONFIG_BLK_DEV_THROTTLING */ #endif /* BLK_INTERNAL_H */ diff --git a/block/elevator.c b/block/elevator.c index 74a277ffed39..66343d6917d0 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -626,7 +626,7 @@ void elv_quiesce_start(struct request_queue *q) queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); spin_unlock_irq(q->queue_lock); - blk_drain_queue(q); + blk_drain_queue(q, false); } void elv_quiesce_end(struct request_queue *q) -- cgit v1.2.3 From 334c2b0b8b2ab186fa198413386cba41fffcb4f2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 25 Oct 2011 15:51:48 +0200 Subject: blk-throttle: use queue_is_locked() instead of lockdep_is_held() We can't use the latter if !CONFIG_LOCKDEP. Reported-by: Sedat Dilek Signed-off-by: Jens Axboe --- block/blk-throttle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 8edb9499b509..4553245d9317 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1218,7 +1218,7 @@ void blk_throtl_drain(struct request_queue *q) struct bio_list bl; struct bio *bio; - lockdep_is_held(q->queue_lock); + WARN_ON_ONCE(!queue_is_locked(q)); bio_list_init(&bl); -- cgit v1.2.3