summaryrefslogtreecommitdiff
path: root/block/blk-core.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-core.c')
-rw-r--r--block/blk-core.c226
1 files changed, 134 insertions, 92 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 15de223c7f93..3a78b00edd71 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -39,6 +39,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
+DEFINE_IDA(blk_queue_ida);
+
/*
* For the allocated request tables
*/
@@ -358,7 +360,8 @@ EXPORT_SYMBOL(blk_put_queue);
void blk_drain_queue(struct request_queue *q, bool drain_all)
{
while (true) {
- int nr_rqs;
+ bool drain = false;
+ int i;
spin_lock_irq(q->queue_lock);
@@ -375,14 +378,25 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
if (!list_empty(&q->queue_head))
__blk_run_queue(q);
- if (drain_all)
- nr_rqs = q->rq.count[0] + q->rq.count[1];
- else
- nr_rqs = q->rq.elvpriv;
+ drain |= q->rq.elvpriv;
+
+ /*
+ * Unfortunately, requests are queued at and tracked from
+ * multiple places and there's no single counter which can
+ * be drained. Check all the queues and counters.
+ */
+ if (drain_all) {
+ drain |= !list_empty(&q->queue_head);
+ for (i = 0; i < 2; i++) {
+ drain |= q->rq.count[i];
+ drain |= q->in_flight[i];
+ drain |= !list_empty(&q->flush_queue[i]);
+ }
+ }
spin_unlock_irq(q->queue_lock);
- if (!nr_rqs)
+ if (!drain)
break;
msleep(10);
}
@@ -469,6 +483,10 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
if (!q)
return NULL;
+ q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
+ if (q->id < 0)
+ goto fail_q;
+
q->backing_dev_info.ra_pages =
(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
q->backing_dev_info.state = 0;
@@ -477,20 +495,17 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
q->node = node_id;
err = bdi_init(&q->backing_dev_info);
- if (err) {
- kmem_cache_free(blk_requestq_cachep, q);
- return NULL;
- }
+ if (err)
+ goto fail_id;
- if (blk_throtl_init(q)) {
- kmem_cache_free(blk_requestq_cachep, q);
- return NULL;
- }
+ if (blk_throtl_init(q))
+ goto fail_id;
setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
laptop_mode_timer_fn, (unsigned long) q);
setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
INIT_LIST_HEAD(&q->timeout_list);
+ INIT_LIST_HEAD(&q->icq_list);
INIT_LIST_HEAD(&q->flush_queue[0]);
INIT_LIST_HEAD(&q->flush_queue[1]);
INIT_LIST_HEAD(&q->flush_data_in_flight);
@@ -508,6 +523,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
q->queue_lock = &q->__queue_lock;
return q;
+
+fail_id:
+ ida_simple_remove(&blk_queue_ida, q->id);
+fail_q:
+ kmem_cache_free(blk_requestq_cachep, q);
+ return NULL;
}
EXPORT_SYMBOL(blk_alloc_queue_node);
@@ -605,26 +626,31 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
}
EXPORT_SYMBOL(blk_init_allocated_queue);
-int blk_get_queue(struct request_queue *q)
+bool blk_get_queue(struct request_queue *q)
{
- if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
- kobject_get(&q->kobj);
- return 0;
+ if (likely(!blk_queue_dead(q))) {
+ __blk_get_queue(q);
+ return true;
}
- return 1;
+ return false;
}
EXPORT_SYMBOL(blk_get_queue);
static inline void blk_free_request(struct request_queue *q, struct request *rq)
{
- if (rq->cmd_flags & REQ_ELVPRIV)
+ if (rq->cmd_flags & REQ_ELVPRIV) {
elv_put_request(q, rq);
+ if (rq->elv.icq)
+ put_io_context(rq->elv.icq->ioc);
+ }
+
mempool_free(rq, q->rq.rq_pool);
}
static struct request *
-blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
+blk_alloc_request(struct request_queue *q, struct io_cq *icq,
+ unsigned int flags, gfp_t gfp_mask)
{
struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
@@ -635,10 +661,15 @@ blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
rq->cmd_flags = flags | REQ_ALLOCED;
- if ((flags & REQ_ELVPRIV) &&
- unlikely(elv_set_request(q, rq, gfp_mask))) {
- mempool_free(rq, q->rq.rq_pool);
- return NULL;
+ if (flags & REQ_ELVPRIV) {
+ rq->elv.icq = icq;
+ if (unlikely(elv_set_request(q, rq, gfp_mask))) {
+ mempool_free(rq, q->rq.rq_pool);
+ return NULL;
+ }
+ /* @rq->elv.icq holds on to io_context until @rq is freed */
+ if (icq)
+ get_io_context(icq->ioc);
}
return rq;
@@ -750,11 +781,17 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
{
struct request *rq = NULL;
struct request_list *rl = &q->rq;
- struct io_context *ioc = NULL;
+ struct elevator_type *et;
+ struct io_context *ioc;
+ struct io_cq *icq = NULL;
const bool is_sync = rw_is_sync(rw_flags) != 0;
+ bool retried = false;
int may_queue;
+retry:
+ et = q->elevator->type;
+ ioc = current->io_context;
- if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+ if (unlikely(blk_queue_dead(q)))
return NULL;
may_queue = elv_may_queue(q, rw_flags);
@@ -763,7 +800,20 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
if (rl->count[is_sync]+1 >= q->nr_requests) {
- ioc = current_io_context(GFP_ATOMIC, q->node);
+ /*
+ * We want ioc to record batching state. If it's
+ * not already there, creating a new one requires
+ * dropping queue_lock, which in turn requires
+ * retesting conditions to avoid queue hang.
+ */
+ if (!ioc && !retried) {
+ spin_unlock_irq(q->queue_lock);
+ create_io_context(current, gfp_mask, q->node);
+ spin_lock_irq(q->queue_lock);
+ retried = true;
+ goto retry;
+ }
+
/*
* The queue will fill after this allocation, so set
* it as full, and mark this process as "batching".
@@ -799,17 +849,38 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
rl->count[is_sync]++;
rl->starved[is_sync] = 0;
+ /*
+ * Decide whether the new request will be managed by elevator. If
+ * so, mark @rw_flags and increment elvpriv. Non-zero elvpriv will
+ * prevent the current elevator from being destroyed until the new
+ * request is freed. This guarantees icq's won't be destroyed and
+ * makes creating new ones safe.
+ *
+ * Also, lookup icq while holding queue_lock. If it doesn't exist,
+ * it will be created after releasing queue_lock.
+ */
if (blk_rq_should_init_elevator(bio) &&
!test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
rw_flags |= REQ_ELVPRIV;
rl->elvpriv++;
+ if (et->icq_cache && ioc)
+ icq = ioc_lookup_icq(ioc, q);
}
if (blk_queue_io_stat(q))
rw_flags |= REQ_IO_STAT;
spin_unlock_irq(q->queue_lock);
- rq = blk_alloc_request(q, rw_flags, gfp_mask);
+ /* create icq if missing */
+ if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) {
+ icq = ioc_create_icq(q, gfp_mask);
+ if (!icq)
+ goto fail_icq;
+ }
+
+ rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
+
+fail_icq:
if (unlikely(!rq)) {
/*
* Allocation failed presumably due to memory. Undo anything
@@ -871,10 +942,9 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
rq = get_request(q, rw_flags, bio, GFP_NOIO);
while (!rq) {
DEFINE_WAIT(wait);
- struct io_context *ioc;
struct request_list *rl = &q->rq;
- if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+ if (unlikely(blk_queue_dead(q)))
return NULL;
prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
@@ -891,8 +961,8 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
* up to a big batch of them for a small period time.
* See ioc_batching, ioc_set_batching
*/
- ioc = current_io_context(GFP_NOIO, q->node);
- ioc_set_batching(q, ioc);
+ create_io_context(current, GFP_NOIO, q->node);
+ ioc_set_batching(q, current->io_context);
spin_lock_irq(q->queue_lock);
finish_wait(&rl->wait[is_sync], &wait);
@@ -1009,54 +1079,6 @@ static void add_acct_request(struct request_queue *q, struct request *rq,
__elv_add_request(q, rq, where);
}
-/**
- * blk_insert_request - insert a special request into a request queue
- * @q: request queue where request should be inserted
- * @rq: request to be inserted
- * @at_head: insert request at head or tail of queue
- * @data: private data
- *
- * Description:
- * Many block devices need to execute commands asynchronously, so they don't
- * block the whole kernel from preemption during request execution. This is
- * accomplished normally by inserting aritficial requests tagged as
- * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
- * be scheduled for actual execution by the request queue.
- *
- * We have the option of inserting the head or the tail of the queue.
- * Typically we use the tail for new ioctls and so forth. We use the head
- * of the queue for things like a QUEUE_FULL message from a device, or a
- * host that is unable to accept a particular command.
- */
-void blk_insert_request(struct request_queue *q, struct request *rq,
- int at_head, void *data)
-{
- int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
- unsigned long flags;
-
- /*
- * tell I/O scheduler that this isn't a regular read/write (ie it
- * must not attempt merges on this) and that it acts as a soft
- * barrier
- */
- rq->cmd_type = REQ_TYPE_SPECIAL;
-
- rq->special = data;
-
- spin_lock_irqsave(q->queue_lock, flags);
-
- /*
- * If command is tagged, release the tag
- */
- if (blk_rq_tagged(rq))
- blk_queue_end_tag(q, rq);
-
- add_acct_request(q, rq, where);
- __blk_run_queue(q);
- spin_unlock_irqrestore(q->queue_lock, flags);
-}
-EXPORT_SYMBOL(blk_insert_request);
-
static void part_round_stats_single(int cpu, struct hd_struct *part,
unsigned long now)
{
@@ -1190,7 +1212,6 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
drive_stat_acct(req, 0);
- elv_bio_merged(q, req, bio);
return true;
}
@@ -1221,7 +1242,6 @@ static bool bio_attempt_front_merge(struct request_queue *q,
req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
drive_stat_acct(req, 0);
- elv_bio_merged(q, req, bio);
return true;
}
@@ -1235,13 +1255,12 @@ static bool bio_attempt_front_merge(struct request_queue *q,
* on %current's plugged list. Returns %true if merge was successful,
* otherwise %false.
*
- * This function is called without @q->queue_lock; however, elevator is
- * accessed iff there already are requests on the plugged list which in
- * turn guarantees validity of the elevator.
- *
- * Note that, on successful merge, elevator operation
- * elevator_bio_merged_fn() will be called without queue lock. Elevator
- * must be ready for this.
+ * Plugging coalesces IOs from the same issuer for the same purpose without
+ * going through @q->queue_lock. As such it's more of an issuing mechanism
+ * than scheduling, and the request, while may have elvpriv data, is not
+ * added on the elevator at this point. In addition, we don't have
+ * reliable access to the elevator outside queue lock. Only check basic
+ * merging parameters without querying the elevator.
*/
static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int *request_count)
@@ -1260,10 +1279,10 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
(*request_count)++;
- if (rq->q != q)
+ if (rq->q != q || !blk_rq_merge_ok(rq, bio))
continue;
- el_ret = elv_try_merge(rq, bio);
+ el_ret = blk_try_merge(rq, bio);
if (el_ret == ELEVATOR_BACK_MERGE) {
ret = bio_attempt_back_merge(q, rq, bio);
if (ret)
@@ -1325,12 +1344,14 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
el_ret = elv_merge(q, &req, bio);
if (el_ret == ELEVATOR_BACK_MERGE) {
if (bio_attempt_back_merge(q, req, bio)) {
+ elv_bio_merged(q, req, bio);
if (!attempt_back_merge(q, req))
elv_merged_request(q, req, el_ret);
goto out_unlock;
}
} else if (el_ret == ELEVATOR_FRONT_MERGE) {
if (bio_attempt_front_merge(q, req, bio)) {
+ elv_bio_merged(q, req, bio);
if (!attempt_front_merge(q, req))
elv_merged_request(q, req, el_ret);
goto out_unlock;
@@ -1766,6 +1787,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
return -EIO;
spin_lock_irqsave(q->queue_lock, flags);
+ if (unlikely(blk_queue_dead(q))) {
+ spin_unlock_irqrestore(q->queue_lock, flags);
+ return -ENODEV;
+ }
/*
* Submitting request must be dequeued before calling this function
@@ -2740,6 +2765,14 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
trace_block_unplug(q, depth, !from_schedule);
/*
+ * Don't mess with dead queue.
+ */
+ if (unlikely(blk_queue_dead(q))) {
+ spin_unlock(q->queue_lock);
+ return;
+ }
+
+ /*
* If we are punting this to kblockd, then we can safely drop
* the queue_lock before waking kblockd (which needs to take
* this lock).
@@ -2815,6 +2848,15 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
depth = 0;
spin_lock(q->queue_lock);
}
+
+ /*
+ * Short-circuit if @q is dead
+ */
+ if (unlikely(blk_queue_dead(q))) {
+ __blk_end_request_all(rq, -ENODEV);
+ continue;
+ }
+
/*
* rq is already accounted, so use raw insert
*/