summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c148
-rw-r--r--block/blk-cgroup.h128
-rw-r--r--block/blk-core.c274
-rw-r--r--block/blk-exec.c11
-rw-r--r--block/blk-ioc.c1
-rw-r--r--block/blk-settings.c3
-rw-r--r--block/blk-sysfs.c34
-rw-r--r--block/blk-throttle.c3
-rw-r--r--block/blk-timeout.c41
-rw-r--r--block/blk.h4
-rw-r--r--block/bsg-lib.c53
-rw-r--r--block/cfq-iosched.c30
-rw-r--r--block/genhd.c20
-rw-r--r--block/ioctl.c59
-rw-r--r--block/partition-generic.c4
-rw-r--r--block/scsi_ioctl.c5
16 files changed, 506 insertions, 312 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 02cf6335e9bd..f3b44a65fc7a 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -31,27 +31,6 @@ EXPORT_SYMBOL_GPL(blkcg_root);
static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
-struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
-{
- return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
- struct blkcg, css);
-}
-EXPORT_SYMBOL_GPL(cgroup_to_blkcg);
-
-static struct blkcg *task_blkcg(struct task_struct *tsk)
-{
- return container_of(task_subsys_state(tsk, blkio_subsys_id),
- struct blkcg, css);
-}
-
-struct blkcg *bio_blkcg(struct bio *bio)
-{
- if (bio && bio->bi_css)
- return container_of(bio->bi_css, struct blkcg, css);
- return task_blkcg(current);
-}
-EXPORT_SYMBOL_GPL(bio_blkcg);
-
static bool blkcg_policy_enabled(struct request_queue *q,
const struct blkcg_policy *pol)
{
@@ -84,6 +63,7 @@ static void blkg_free(struct blkcg_gq *blkg)
kfree(pd);
}
+ blk_exit_rl(&blkg->rl);
kfree(blkg);
}
@@ -91,16 +71,18 @@ static void blkg_free(struct blkcg_gq *blkg)
* blkg_alloc - allocate a blkg
* @blkcg: block cgroup the new blkg is associated with
* @q: request_queue the new blkg is associated with
+ * @gfp_mask: allocation mask to use
*
* Allocate a new blkg assocating @blkcg and @q.
*/
-static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
+static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
+ gfp_t gfp_mask)
{
struct blkcg_gq *blkg;
int i;
/* alloc and init base part */
- blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
+ blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
if (!blkg)
return NULL;
@@ -109,6 +91,13 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
blkg->blkcg = blkcg;
blkg->refcnt = 1;
+ /* root blkg uses @q->root_rl, init rl only for !root blkgs */
+ if (blkcg != &blkcg_root) {
+ if (blk_init_rl(&blkg->rl, q, gfp_mask))
+ goto err_free;
+ blkg->rl.blkg = blkg;
+ }
+
for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
struct blkg_policy_data *pd;
@@ -117,25 +106,23 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
continue;
/* alloc per-policy data and attach it to blkg */
- pd = kzalloc_node(pol->pd_size, GFP_ATOMIC, q->node);
- if (!pd) {
- blkg_free(blkg);
- return NULL;
- }
+ pd = kzalloc_node(pol->pd_size, gfp_mask, q->node);
+ if (!pd)
+ goto err_free;
blkg->pd[i] = pd;
pd->blkg = blkg;
- }
-
- /* invoke per-policy init */
- for (i = 0; i < BLKCG_MAX_POLS; i++) {
- struct blkcg_policy *pol = blkcg_policy[i];
+ /* invoke per-policy init */
if (blkcg_policy_enabled(blkg->q, pol))
pol->pd_init_fn(blkg);
}
return blkg;
+
+err_free:
+ blkg_free(blkg);
+ return NULL;
}
static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
@@ -179,9 +166,13 @@ struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blkg_lookup);
+/*
+ * If @new_blkg is %NULL, this function tries to allocate a new one as
+ * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return.
+ */
static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
- struct request_queue *q)
- __releases(q->queue_lock) __acquires(q->queue_lock)
+ struct request_queue *q,
+ struct blkcg_gq *new_blkg)
{
struct blkcg_gq *blkg;
int ret;
@@ -193,24 +184,26 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
blkg = __blkg_lookup(blkcg, q);
if (blkg) {
rcu_assign_pointer(blkcg->blkg_hint, blkg);
- return blkg;
+ goto out_free;
}
/* blkg holds a reference to blkcg */
- if (!css_tryget(&blkcg->css))
- return ERR_PTR(-EINVAL);
+ if (!css_tryget(&blkcg->css)) {
+ blkg = ERR_PTR(-EINVAL);
+ goto out_free;
+ }
/* allocate */
- ret = -ENOMEM;
- blkg = blkg_alloc(blkcg, q);
- if (unlikely(!blkg))
- goto err_put;
+ if (!new_blkg) {
+ new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC);
+ if (unlikely(!new_blkg)) {
+ blkg = ERR_PTR(-ENOMEM);
+ goto out_put;
+ }
+ }
+ blkg = new_blkg;
/* insert */
- ret = radix_tree_preload(GFP_ATOMIC);
- if (ret)
- goto err_free;
-
spin_lock(&blkcg->lock);
ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
if (likely(!ret)) {
@@ -219,15 +212,15 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
}
spin_unlock(&blkcg->lock);
- radix_tree_preload_end();
-
if (!ret)
return blkg;
-err_free:
- blkg_free(blkg);
-err_put:
+
+ blkg = ERR_PTR(ret);
+out_put:
css_put(&blkcg->css);
- return ERR_PTR(ret);
+out_free:
+ blkg_free(new_blkg);
+ return blkg;
}
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
@@ -239,16 +232,15 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
*/
if (unlikely(blk_queue_bypass(q)))
return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
- return __blkg_lookup_create(blkcg, q);
+ return __blkg_lookup_create(blkcg, q, NULL);
}
EXPORT_SYMBOL_GPL(blkg_lookup_create);
static void blkg_destroy(struct blkcg_gq *blkg)
{
- struct request_queue *q = blkg->q;
struct blkcg *blkcg = blkg->blkcg;
- lockdep_assert_held(q->queue_lock);
+ lockdep_assert_held(blkg->q->queue_lock);
lockdep_assert_held(&blkcg->lock);
/* Something wrong if we are trying to remove same group twice */
@@ -318,6 +310,38 @@ void __blkg_release(struct blkcg_gq *blkg)
}
EXPORT_SYMBOL_GPL(__blkg_release);
+/*
+ * The next function used by blk_queue_for_each_rl(). It's a bit tricky
+ * because the root blkg uses @q->root_rl instead of its own rl.
+ */
+struct request_list *__blk_queue_next_rl(struct request_list *rl,
+ struct request_queue *q)
+{
+ struct list_head *ent;
+ struct blkcg_gq *blkg;
+
+ /*
+ * Determine the current blkg list_head. The first entry is
+ * root_rl which is off @q->blkg_list and mapped to the head.
+ */
+ if (rl == &q->root_rl) {
+ ent = &q->blkg_list;
+ } else {
+ blkg = container_of(rl, struct blkcg_gq, rl);
+ ent = &blkg->q_node;
+ }
+
+ /* walk to the next list_head, skip root blkcg */
+ ent = ent->next;
+ if (ent == &q->root_blkg->q_node)
+ ent = ent->next;
+ if (ent == &q->blkg_list)
+ return NULL;
+
+ blkg = container_of(ent, struct blkcg_gq, q_node);
+ return &blkg->rl;
+}
+
static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype,
u64 val)
{
@@ -739,24 +763,36 @@ int blkcg_activate_policy(struct request_queue *q,
struct blkcg_gq *blkg;
struct blkg_policy_data *pd, *n;
int cnt = 0, ret;
+ bool preloaded;
if (blkcg_policy_enabled(q, pol))
return 0;
+ /* preallocations for root blkg */
+ blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
+ if (!blkg)
+ return -ENOMEM;
+
+ preloaded = !radix_tree_preload(GFP_KERNEL);
+
blk_queue_bypass_start(q);
/* make sure the root blkg exists and count the existing blkgs */
spin_lock_irq(q->queue_lock);
rcu_read_lock();
- blkg = __blkg_lookup_create(&blkcg_root, q);
+ blkg = __blkg_lookup_create(&blkcg_root, q, blkg);
rcu_read_unlock();
+ if (preloaded)
+ radix_tree_preload_end();
+
if (IS_ERR(blkg)) {
ret = PTR_ERR(blkg);
goto out_unlock;
}
q->root_blkg = blkg;
+ q->root_rl.blkg = blkg;
list_for_each_entry(blkg, &q->blkg_list, q_node)
cnt++;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 8ac457ce7783..24597309e23d 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -17,6 +17,7 @@
#include <linux/u64_stats_sync.h>
#include <linux/seq_file.h>
#include <linux/radix-tree.h>
+#include <linux/blkdev.h>
/* Max limits for throttle policy */
#define THROTL_IOPS_MAX UINT_MAX
@@ -93,6 +94,8 @@ struct blkcg_gq {
struct list_head q_node;
struct hlist_node blkcg_node;
struct blkcg *blkcg;
+ /* request allocation list for this blkcg-q pair */
+ struct request_list rl;
/* reference count */
int refcnt;
@@ -120,8 +123,6 @@ struct blkcg_policy {
extern struct blkcg blkcg_root;
-struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup);
-struct blkcg *bio_blkcg(struct bio *bio);
struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q);
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
struct request_queue *q);
@@ -160,6 +161,25 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
+static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup)
+{
+ return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
+ struct blkcg, css);
+}
+
+static inline struct blkcg *task_blkcg(struct task_struct *tsk)
+{
+ return container_of(task_subsys_state(tsk, blkio_subsys_id),
+ struct blkcg, css);
+}
+
+static inline struct blkcg *bio_blkcg(struct bio *bio)
+{
+ if (bio && bio->bi_css)
+ return container_of(bio->bi_css, struct blkcg, css);
+ return task_blkcg(current);
+}
+
/**
* blkg_to_pdata - get policy private data
* @blkg: blkg of interest
@@ -234,6 +254,95 @@ static inline void blkg_put(struct blkcg_gq *blkg)
}
/**
+ * blk_get_rl - get request_list to use
+ * @q: request_queue of interest
+ * @bio: bio which will be attached to the allocated request (may be %NULL)
+ *
+ * The caller wants to allocate a request from @q to use for @bio. Find
+ * the request_list to use and obtain a reference on it. Should be called
+ * under queue_lock. This function is guaranteed to return non-%NULL
+ * request_list.
+ */
+static inline struct request_list *blk_get_rl(struct request_queue *q,
+ struct bio *bio)
+{
+ struct blkcg *blkcg;
+ struct blkcg_gq *blkg;
+
+ rcu_read_lock();
+
+ blkcg = bio_blkcg(bio);
+
+ /* bypass blkg lookup and use @q->root_rl directly for root */
+ if (blkcg == &blkcg_root)
+ goto root_rl;
+
+ /*
+ * Try to use blkg->rl. blkg lookup may fail under memory pressure
+ * or if either the blkcg or queue is going away. Fall back to
+ * root_rl in such cases.
+ */
+ blkg = blkg_lookup_create(blkcg, q);
+ if (unlikely(IS_ERR(blkg)))
+ goto root_rl;
+
+ blkg_get(blkg);
+ rcu_read_unlock();
+ return &blkg->rl;
+root_rl:
+ rcu_read_unlock();
+ return &q->root_rl;
+}
+
+/**
+ * blk_put_rl - put request_list
+ * @rl: request_list to put
+ *
+ * Put the reference acquired by blk_get_rl(). Should be called under
+ * queue_lock.
+ */
+static inline void blk_put_rl(struct request_list *rl)
+{
+ /* root_rl may not have blkg set */
+ if (rl->blkg && rl->blkg->blkcg != &blkcg_root)
+ blkg_put(rl->blkg);
+}
+
+/**
+ * blk_rq_set_rl - associate a request with a request_list
+ * @rq: request of interest
+ * @rl: target request_list
+ *
+ * Associate @rq with @rl so that accounting and freeing can know the
+ * request_list @rq came from.
+ */
+static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
+{
+ rq->rl = rl;
+}
+
+/**
+ * blk_rq_rl - return the request_list a request came from
+ * @rq: request of interest
+ *
+ * Return the request_list @rq is allocated from.
+ */
+static inline struct request_list *blk_rq_rl(struct request *rq)
+{
+ return rq->rl;
+}
+
+struct request_list *__blk_queue_next_rl(struct request_list *rl,
+ struct request_queue *q);
+/**
+ * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
+ *
+ * Should be used under queue_lock.
+ */
+#define blk_queue_for_each_rl(rl, q) \
+ for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
+
+/**
* blkg_stat_add - add a value to a blkg_stat
* @stat: target blkg_stat
* @val: value to add
@@ -351,6 +460,7 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
#else /* CONFIG_BLK_CGROUP */
struct cgroup;
+struct blkcg;
struct blkg_policy_data {
};
@@ -361,8 +471,6 @@ struct blkcg_gq {
struct blkcg_policy {
};
-static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
-static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
static inline void blkcg_drain_queue(struct request_queue *q) { }
@@ -374,6 +482,9 @@ static inline int blkcg_activate_policy(struct request_queue *q,
static inline void blkcg_deactivate_policy(struct request_queue *q,
const struct blkcg_policy *pol) { }
+static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; }
+static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
+
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
struct blkcg_policy *pol) { return NULL; }
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
@@ -381,5 +492,14 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }
+static inline struct request_list *blk_get_rl(struct request_queue *q,
+ struct bio *bio) { return &q->root_rl; }
+static inline void blk_put_rl(struct request_list *rl) { }
+static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
+static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
+
+#define blk_queue_for_each_rl(rl, q) \
+ for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
+
#endif /* CONFIG_BLK_CGROUP */
#endif /* _BLK_CGROUP_H */
diff --git a/block/blk-core.c b/block/blk-core.c
index 3c923a7aeb56..4b4dbdfbca89 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -361,9 +361,10 @@ EXPORT_SYMBOL(blk_put_queue);
*/
void blk_drain_queue(struct request_queue *q, bool drain_all)
{
+ int i;
+
while (true) {
bool drain = false;
- int i;
spin_lock_irq(q->queue_lock);
@@ -386,7 +387,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
if (!list_empty(&q->queue_head) && q->request_fn)
__blk_run_queue(q);
- drain |= q->rq.elvpriv;
+ drain |= q->nr_rqs_elvpriv;
/*
* Unfortunately, requests are queued at and tracked from
@@ -396,7 +397,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
if (drain_all) {
drain |= !list_empty(&q->queue_head);
for (i = 0; i < 2; i++) {
- drain |= q->rq.count[i];
+ drain |= q->nr_rqs[i];
drain |= q->in_flight[i];
drain |= !list_empty(&q->flush_queue[i]);
}
@@ -408,6 +409,23 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
break;
msleep(10);
}
+
+ /*
+ * With queue marked dead, any woken up waiter will fail the
+ * allocation path, so the wakeup chaining is lost and we're
+ * left with hung waiters. We need to wake up those waiters.
+ */
+ if (q->request_fn) {
+ struct request_list *rl;
+
+ spin_lock_irq(q->queue_lock);
+
+ blk_queue_for_each_rl(rl, q)
+ for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
+ wake_up_all(&rl->wait[i]);
+
+ spin_unlock_irq(q->queue_lock);
+ }
}
/**
@@ -467,7 +485,6 @@ void blk_cleanup_queue(struct request_queue *q)
/* mark @q DEAD, no new request or merges will be allowed afterwards */
mutex_lock(&q->sysfs_lock);
queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
-
spin_lock_irq(lock);
/*
@@ -485,10 +502,6 @@ void blk_cleanup_queue(struct request_queue *q)
queue_flag_set(QUEUE_FLAG_NOMERGES, q);
queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
queue_flag_set(QUEUE_FLAG_DEAD, q);
-
- if (q->queue_lock != &q->__queue_lock)
- q->queue_lock = &q->__queue_lock;
-
spin_unlock_irq(lock);
mutex_unlock(&q->sysfs_lock);
@@ -499,33 +512,43 @@ void blk_cleanup_queue(struct request_queue *q)
del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
blk_sync_queue(q);
+ spin_lock_irq(lock);
+ if (q->queue_lock != &q->__queue_lock)
+ q->queue_lock = &q->__queue_lock;
+ spin_unlock_irq(lock);
+
/* @q is and will stay empty, shutdown and put */
blk_put_queue(q);
}
EXPORT_SYMBOL(blk_cleanup_queue);
-static int blk_init_free_list(struct request_queue *q)
+int blk_init_rl(struct request_list *rl, struct request_queue *q,
+ gfp_t gfp_mask)
{
- struct request_list *rl = &q->rq;
-
if (unlikely(rl->rq_pool))
return 0;
+ rl->q = q;
rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
- rl->elvpriv = 0;
init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
- mempool_free_slab, request_cachep, q->node);
-
+ mempool_free_slab, request_cachep,
+ gfp_mask, q->node);
if (!rl->rq_pool)
return -ENOMEM;
return 0;
}
+void blk_exit_rl(struct request_list *rl)
+{
+ if (rl->rq_pool)
+ mempool_destroy(rl->rq_pool);
+}
+
struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
{
return blk_alloc_queue_node(gfp_mask, -1);
@@ -667,7 +690,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
if (!q)
return NULL;
- if (blk_init_free_list(q))
+ if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
return NULL;
q->request_fn = rfn;
@@ -709,15 +732,15 @@ bool blk_get_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_get_queue);
-static inline void blk_free_request(struct request_queue *q, struct request *rq)
+static inline void blk_free_request(struct request_list *rl, struct request *rq)
{
if (rq->cmd_flags & REQ_ELVPRIV) {
- elv_put_request(q, rq);
+ elv_put_request(rl->q, rq);
if (rq->elv.icq)
put_io_context(rq->elv.icq->ioc);
}
- mempool_free(rq, q->rq.rq_pool);
+ mempool_free(rq, rl->rq_pool);
}
/*
@@ -754,18 +777,23 @@ static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
ioc->last_waited = jiffies;
}
-static void __freed_request(struct request_queue *q, int sync)
+static void __freed_request(struct request_list *rl, int sync)
{
- struct request_list *rl = &q->rq;
+ struct request_queue *q = rl->q;
- if (rl->count[sync] < queue_congestion_off_threshold(q))
+ /*
+ * bdi isn't aware of blkcg yet. As all async IOs end up root
+ * blkcg anyway, just use root blkcg state.
+ */
+ if (rl == &q->root_rl &&
+ rl->count[sync] < queue_congestion_off_threshold(q))
blk_clear_queue_congested(q, sync);
if (rl->count[sync] + 1 <= q->nr_requests) {
if (waitqueue_active(&rl->wait[sync]))
wake_up(&rl->wait[sync]);
- blk_clear_queue_full(q, sync);
+ blk_clear_rl_full(rl, sync);
}
}
@@ -773,19 +801,20 @@ static void __freed_request(struct request_queue *q, int sync)
* A request has just been released. Account for it, update the full and
* congestion status, wake up any waiters. Called under q->queue_lock.
*/
-static void freed_request(struct request_queue *q, unsigned int flags)
+static void freed_request(struct request_list *rl, unsigned int flags)
{
- struct request_list *rl = &q->rq;
+ struct request_queue *q = rl->q;
int sync = rw_is_sync(flags);
+ q->nr_rqs[sync]--;
rl->count[sync]--;
if (flags & REQ_ELVPRIV)
- rl->elvpriv--;
+ q->nr_rqs_elvpriv--;
- __freed_request(q, sync);
+ __freed_request(rl, sync);
if (unlikely(rl->starved[sync ^ 1]))
- __freed_request(q, sync ^ 1);
+ __freed_request(rl, sync ^ 1);
}
/*
@@ -824,8 +853,8 @@ static struct io_context *rq_ioc(struct bio *bio)
}
/**
- * get_request - get a free request
- * @q: request_queue to allocate request from
+ * __get_request - get a free request
+ * @rl: request list to allocate from
* @rw_flags: RW and SYNC flags
* @bio: bio to allocate request for (can be %NULL)
* @gfp_mask: allocation mask
@@ -837,20 +866,16 @@ static struct io_context *rq_ioc(struct bio *bio)
* Returns %NULL on failure, with @q->queue_lock held.
* Returns !%NULL on success, with @q->queue_lock *not held*.
*/
-static struct request *get_request(struct request_queue *q, int rw_flags,
- struct bio *bio, gfp_t gfp_mask)
+static struct request *__get_request(struct request_list *rl, int rw_flags,
+ struct bio *bio, gfp_t gfp_mask)
{
+ struct request_queue *q = rl->q;
struct request *rq;
- struct request_list *rl = &q->rq;
- struct elevator_type *et;
- struct io_context *ioc;
+ struct elevator_type *et = q->elevator->type;
+ struct io_context *ioc = rq_ioc(bio);
struct io_cq *icq = NULL;
const bool is_sync = rw_is_sync(rw_flags) != 0;
- bool retried = false;
int may_queue;
-retry:
- et = q->elevator->type;
- ioc = rq_ioc(bio);
if (unlikely(blk_queue_dead(q)))
return NULL;
@@ -862,28 +887,14 @@ retry:
if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
if (rl->count[is_sync]+1 >= q->nr_requests) {
/*
- * We want ioc to record batching state. If it's
- * not already there, creating a new one requires
- * dropping queue_lock, which in turn requires
- * retesting conditions to avoid queue hang.
- */
- if (!ioc && !retried) {
- spin_unlock_irq(q->queue_lock);
- create_io_context(gfp_mask, q->node);
- spin_lock_irq(q->queue_lock);
- retried = true;
- goto retry;
- }
-
- /*
* The queue will fill after this allocation, so set
* it as full, and mark this process as "batching".
* This process will be allowed to complete a batch of
* requests, others will be blocked.
*/
- if (!blk_queue_full(q, is_sync)) {
+ if (!blk_rl_full(rl, is_sync)) {
ioc_set_batching(q, ioc);
- blk_set_queue_full(q, is_sync);
+ blk_set_rl_full(rl, is_sync);
} else {
if (may_queue != ELV_MQUEUE_MUST
&& !ioc_batching(q, ioc)) {
@@ -896,7 +907,12 @@ retry:
}
}
}
- blk_set_queue_congested(q, is_sync);
+ /*
+ * bdi isn't aware of blkcg yet. As all async IOs end up
+ * root blkcg anyway, just use root blkcg state.
+ */
+ if (rl == &q->root_rl)
+ blk_set_queue_congested(q, is_sync);
}
/*
@@ -907,6 +923,7 @@ retry:
if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
return NULL;
+ q->nr_rqs[is_sync]++;
rl->count[is_sync]++;
rl->starved[is_sync] = 0;
@@ -922,7 +939,7 @@ retry:
*/
if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
rw_flags |= REQ_ELVPRIV;
- rl->elvpriv++;
+ q->nr_rqs_elvpriv++;
if (et->icq_cache && ioc)
icq = ioc_lookup_icq(ioc, q);
}
@@ -932,22 +949,19 @@ retry:
spin_unlock_irq(q->queue_lock);
/* allocate and init request */
- rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
+ rq = mempool_alloc(rl->rq_pool, gfp_mask);
if (!rq)
goto fail_alloc;
blk_rq_init(q, rq);
+ blk_rq_set_rl(rq, rl);
rq->cmd_flags = rw_flags | REQ_ALLOCED;
/* init elvpriv */
if (rw_flags & REQ_ELVPRIV) {
if (unlikely(et->icq_cache && !icq)) {
- create_io_context(gfp_mask, q->node);
- ioc = rq_ioc(bio);
- if (!ioc)
- goto fail_elvpriv;
-
- icq = ioc_create_icq(ioc, q, gfp_mask);
+ if (ioc)
+ icq = ioc_create_icq(ioc, q, gfp_mask);
if (!icq)
goto fail_elvpriv;
}
@@ -987,7 +1001,7 @@ fail_elvpriv:
rq->elv.icq = NULL;
spin_lock_irq(q->queue_lock);
- rl->elvpriv--;
+ q->nr_rqs_elvpriv--;
spin_unlock_irq(q->queue_lock);
goto out;
@@ -1000,7 +1014,7 @@ fail_alloc:
* queue, but this is pretty rare.
*/
spin_lock_irq(q->queue_lock);
- freed_request(q, rw_flags);
+ freed_request(rl, rw_flags);
/*
* in the very unlikely event that allocation failed and no
@@ -1016,56 +1030,58 @@ rq_starved:
}
/**
- * get_request_wait - get a free request with retry
+ * get_request - get a free request
* @q: request_queue to allocate request from
* @rw_flags: RW and SYNC flags
* @bio: bio to allocate request for (can be %NULL)
+ * @gfp_mask: allocation mask
*
- * Get a free request from @q. This function keeps retrying under memory
- * pressure and fails iff @q is dead.
+ * Get a free request from @q. If %__GFP_WAIT is set in @gfp_mask, this
+ * function keeps retrying under memory pressure and fails iff @q is dead.
*
* Must be callled with @q->queue_lock held and,
* Returns %NULL on failure, with @q->queue_lock held.
* Returns !%NULL on success, with @q->queue_lock *not held*.
*/
-static struct request *get_request_wait(struct request_queue *q, int rw_flags,
- struct bio *bio)
+static struct request *get_request(struct request_queue *q, int rw_flags,
+ struct bio *bio, gfp_t gfp_mask)
{
const bool is_sync = rw_is_sync(rw_flags) != 0;
+ DEFINE_WAIT(wait);
+ struct request_list *rl;
struct request *rq;
- rq = get_request(q, rw_flags, bio, GFP_NOIO);
- while (!rq) {
- DEFINE_WAIT(wait);
- struct request_list *rl = &q->rq;
-
- if (unlikely(blk_queue_dead(q)))
- return NULL;
+ rl = blk_get_rl(q, bio); /* transferred to @rq on success */
+retry:
+ rq = __get_request(rl, rw_flags, bio, gfp_mask);
+ if (rq)
+ return rq;
- prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
- TASK_UNINTERRUPTIBLE);
+ if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) {
+ blk_put_rl(rl);
+ return NULL;
+ }
- trace_block_sleeprq(q, bio, rw_flags & 1);
+ /* wait on @rl and retry */
+ prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
+ TASK_UNINTERRUPTIBLE);
- spin_unlock_irq(q->queue_lock);
- io_schedule();
+ trace_block_sleeprq(q, bio, rw_flags & 1);
- /*
- * After sleeping, we become a "batching" process and
- * will be able to allocate at least one request, and
- * up to a big batch of them for a small period time.
- * See ioc_batching, ioc_set_batching
- */
- create_io_context(GFP_NOIO, q->node);
- ioc_set_batching(q, current->io_context);
+ spin_unlock_irq(q->queue_lock);
+ io_schedule();
- spin_lock_irq(q->queue_lock);
- finish_wait(&rl->wait[is_sync], &wait);
+ /*
+ * After sleeping, we become a "batching" process and will be able
+ * to allocate at least one request, and up to a big batch of them
+ * for a small period time. See ioc_batching, ioc_set_batching
+ */
+ ioc_set_batching(q, current->io_context);
- rq = get_request(q, rw_flags, bio, GFP_NOIO);
- };
+ spin_lock_irq(q->queue_lock);
+ finish_wait(&rl->wait[is_sync], &wait);
- return rq;
+ goto retry;
}
struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
@@ -1074,11 +1090,11 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
BUG_ON(rw != READ && rw != WRITE);
+ /* create ioc upfront */
+ create_io_context(gfp_mask, q->node);
+
spin_lock_irq(q->queue_lock);
- if (gfp_mask & __GFP_WAIT)
- rq = get_request_wait(q, rw, NULL);
- else
- rq = get_request(q, rw, NULL, gfp_mask);
+ rq = get_request(q, rw, NULL, gfp_mask);
if (!rq)
spin_unlock_irq(q->queue_lock);
/* q->queue_lock is unlocked at this point */
@@ -1235,12 +1251,14 @@ void __blk_put_request(struct request_queue *q, struct request *req)
*/
if (req->cmd_flags & REQ_ALLOCED) {
unsigned int flags = req->cmd_flags;
+ struct request_list *rl = blk_rq_rl(req);
BUG_ON(!list_empty(&req->queuelist));
BUG_ON(!hlist_unhashed(&req->hash));
- blk_free_request(q, req);
- freed_request(q, flags);
+ blk_free_request(rl, req);
+ freed_request(rl, flags);
+ blk_put_rl(rl);
}
}
EXPORT_SYMBOL_GPL(__blk_put_request);
@@ -1468,7 +1486,7 @@ get_rq:
* Grab a free request. This is might sleep but can not fail.
* Returns with the queue unlocked.
*/
- req = get_request_wait(q, rw_flags, bio);
+ req = get_request(q, rw_flags, bio, GFP_NOIO);
if (unlikely(!req)) {
bio_endio(bio, -ENODEV); /* @q is dead */
goto out_unlock;
@@ -1689,6 +1707,14 @@ generic_make_request_checks(struct bio *bio)
goto end_io;
}
+ /*
+ * Various block parts want %current->io_context and lazy ioc
+ * allocation ends up trading a lot of pain for a small amount of
+ * memory. Just allocate it upfront. This may fail and block
+ * layer knows how to live with it.
+ */
+ create_io_context(GFP_ATOMIC, q->node);
+
if (blk_throtl_bio(q, bio))
return false; /* throttled, will be resubmitted later */
@@ -2883,23 +2909,47 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
}
-static void flush_plug_callbacks(struct blk_plug *plug)
+static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
{
LIST_HEAD(callbacks);
- if (list_empty(&plug->cb_list))
- return;
-
- list_splice_init(&plug->cb_list, &callbacks);
+ while (!list_empty(&plug->cb_list)) {
+ list_splice_init(&plug->cb_list, &callbacks);
- while (!list_empty(&callbacks)) {
- struct blk_plug_cb *cb = list_first_entry(&callbacks,
+ while (!list_empty(&callbacks)) {
+ struct blk_plug_cb *cb = list_first_entry(&callbacks,
struct blk_plug_cb,
list);
- list_del(&cb->list);
- cb->callback(cb);
+ list_del(&cb->list);
+ cb->callback(cb, from_schedule);
+ }
+ }
+}
+
+struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
+ int size)
+{
+ struct blk_plug *plug = current->plug;
+ struct blk_plug_cb *cb;
+
+ if (!plug)
+ return NULL;
+
+ list_for_each_entry(cb, &plug->cb_list, list)
+ if (cb->callback == unplug && cb->data == data)
+ return cb;
+
+ /* Not currently on the callback list */
+ BUG_ON(size < sizeof(*cb));
+ cb = kzalloc(size, GFP_ATOMIC);
+ if (cb) {
+ cb->data = data;
+ cb->callback = unplug;
+ list_add(&cb->list, &plug->cb_list);
}
+ return cb;
}
+EXPORT_SYMBOL(blk_check_plugged);
void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
@@ -2911,7 +2961,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
BUG_ON(plug->magic != PLUG_MAGIC);
- flush_plug_callbacks(plug);
+ flush_plug_callbacks(plug, from_schedule);
if (list_empty(&plug->list))
return;
diff --git a/block/blk-exec.c b/block/blk-exec.c
index fb2cbd551621..8b6dc5bd4dd0 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -43,6 +43,9 @@ static void blk_end_sync_rq(struct request *rq, int error)
* Description:
* Insert a fully prepared request at the back of the I/O scheduler queue
* for execution. Don't wait for completion.
+ *
+ * Note:
+ * This function will invoke @done directly if the queue is dead.
*/
void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
struct request *rq, int at_head,
@@ -51,18 +54,20 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
WARN_ON(irqs_disabled());
+
+ rq->rq_disk = bd_disk;
+ rq->end_io = done;
+
spin_lock_irq(q->queue_lock);
if (unlikely(blk_queue_dead(q))) {
- spin_unlock_irq(q->queue_lock);
rq->errors = -ENXIO;
if (rq->end_io)
rq->end_io(rq, rq->errors);
+ spin_unlock_irq(q->queue_lock);
return;
}
- rq->rq_disk = bd_disk;
- rq->end_io = done;
__elv_add_request(q, rq, where);
__blk_run_queue(q);
/* the queue is stopped so it won't be run */
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 893b8007c657..fab4cdd3f7bb 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -244,6 +244,7 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node)
/* initialize */
atomic_long_set(&ioc->refcount, 1);
+ atomic_set(&ioc->nr_tasks, 1);
atomic_set(&ioc->active_ref, 1);
spin_lock_init(&ioc->lock);
INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d3234fc494ad..565a6786032f 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -143,8 +143,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->discard_zeroes_data = 1;
lim->max_segments = USHRT_MAX;
lim->max_hw_sectors = UINT_MAX;
-
- lim->max_sectors = BLK_DEF_MAX_SECTORS;
+ lim->max_sectors = UINT_MAX;
}
EXPORT_SYMBOL(blk_set_stacking_limits);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index aa41b47c22d2..9628b291f960 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -40,7 +40,7 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page)
static ssize_t
queue_requests_store(struct request_queue *q, const char *page, size_t count)
{
- struct request_list *rl = &q->rq;
+ struct request_list *rl;
unsigned long nr;
int ret;
@@ -55,6 +55,9 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
q->nr_requests = nr;
blk_queue_congestion_threshold(q);
+ /* congestion isn't cgroup aware and follows root blkcg for now */
+ rl = &q->root_rl;
+
if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
blk_set_queue_congested(q, BLK_RW_SYNC);
else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
@@ -65,19 +68,22 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
blk_clear_queue_congested(q, BLK_RW_ASYNC);
- if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
- blk_set_queue_full(q, BLK_RW_SYNC);
- } else {
- blk_clear_queue_full(q, BLK_RW_SYNC);
- wake_up(&rl->wait[BLK_RW_SYNC]);
+ blk_queue_for_each_rl(rl, q) {
+ if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
+ blk_set_rl_full(rl, BLK_RW_SYNC);
+ } else {
+ blk_clear_rl_full(rl, BLK_RW_SYNC);
+ wake_up(&rl->wait[BLK_RW_SYNC]);
+ }
+
+ if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
+ blk_set_rl_full(rl, BLK_RW_ASYNC);
+ } else {
+ blk_clear_rl_full(rl, BLK_RW_ASYNC);
+ wake_up(&rl->wait[BLK_RW_ASYNC]);
+ }
}
- if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
- blk_set_queue_full(q, BLK_RW_ASYNC);
- } else {
- blk_clear_queue_full(q, BLK_RW_ASYNC);
- wake_up(&rl->wait[BLK_RW_ASYNC]);
- }
spin_unlock_irq(q->queue_lock);
return ret;
}
@@ -476,7 +482,6 @@ static void blk_release_queue(struct kobject *kobj)
{
struct request_queue *q =
container_of(kobj, struct request_queue, kobj);
- struct request_list *rl = &q->rq;
blk_sync_queue(q);
@@ -489,8 +494,7 @@ static void blk_release_queue(struct kobject *kobj)
elevator_exit(q->elevator);
}
- if (rl->rq_pool)
- mempool_destroy(rl->rq_pool);
+ blk_exit_rl(&q->root_rl);
if (q->queue_tags)
__blk_queue_free_tags(q);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 5b0659512047..e287c19908c8 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1123,9 +1123,6 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
goto out;
}
- /* bio_associate_current() needs ioc, try creating */
- create_io_context(GFP_ATOMIC, q->node);
-
/*
* A throtl_grp pointer retrieved under rcu can be used to access
* basic fields like stats and io rates. If a group has no rules,
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 780354888958..6e4744cbfb56 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -197,44 +197,3 @@ void blk_add_timer(struct request *req)
mod_timer(&q->timeout, expiry);
}
-/**
- * blk_abort_queue -- Abort all request on given queue
- * @queue: pointer to queue
- *
- */
-void blk_abort_queue(struct request_queue *q)
-{
- unsigned long flags;
- struct request *rq, *tmp;
- LIST_HEAD(list);
-
- /*
- * Not a request based block device, nothing to abort
- */
- if (!q->request_fn)
- return;
-
- spin_lock_irqsave(q->queue_lock, flags);
-
- elv_abort_queue(q);
-
- /*
- * Splice entries to local list, to avoid deadlocking if entries
- * get readded to the timeout list by error handling
- */
- list_splice_init(&q->timeout_list, &list);
-
- list_for_each_entry_safe(rq, tmp, &list, timeout_list)
- blk_abort_request(rq);
-
- /*
- * Occasionally, blk_abort_request() will return without
- * deleting the element from the list. Make sure we add those back
- * instead of leaving them on the local stack list.
- */
- list_splice(&list, &q->timeout_list);
-
- spin_unlock_irqrestore(q->queue_lock, flags);
-
-}
-EXPORT_SYMBOL_GPL(blk_abort_queue);
diff --git a/block/blk.h b/block/blk.h
index 85f6ae42f7d3..2a0ea32d249f 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -18,6 +18,9 @@ static inline void __blk_get_queue(struct request_queue *q)
kobject_get(&q->kobj);
}
+int blk_init_rl(struct request_list *rl, struct request_queue *q,
+ gfp_t gfp_mask);
+void blk_exit_rl(struct request_list *rl);
void init_request_from_bio(struct request *req, struct bio *bio);
void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
struct bio *bio);
@@ -33,7 +36,6 @@ bool __blk_end_bidi_request(struct request *rq, int error,
void blk_rq_timed_out_timer(unsigned long data);
void blk_delete_timer(struct request *);
void blk_add_timer(struct request *);
-void __generic_unplug_device(struct request_queue *);
/*
* Internal atomic flags for request handling
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 7ad49c88f6b1..deee61fbb741 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -243,56 +243,3 @@ int bsg_setup_queue(struct device *dev, struct request_queue *q,
return 0;
}
EXPORT_SYMBOL_GPL(bsg_setup_queue);
-
-/**
- * bsg_remove_queue - Deletes the bsg dev from the q
- * @q: the request_queue that is to be torn down.
- *
- * Notes:
- * Before unregistering the queue empty any requests that are blocked
- */
-void bsg_remove_queue(struct request_queue *q)
-{
- struct request *req; /* block request */
- int counts; /* totals for request_list count and starved */
-
- if (!q)
- return;
-
- /* Stop taking in new requests */
- spin_lock_irq(q->queue_lock);
- blk_stop_queue(q);
-
- /* drain all requests in the queue */
- while (1) {
- /* need the lock to fetch a request
- * this may fetch the same reqeust as the previous pass
- */
- req = blk_fetch_request(q);
- /* save requests in use and starved */
- counts = q->rq.count[0] + q->rq.count[1] +
- q->rq.starved[0] + q->rq.starved[1];
- spin_unlock_irq(q->queue_lock);
- /* any requests still outstanding? */
- if (counts == 0)
- break;
-
- /* This may be the same req as the previous iteration,
- * always send the blk_end_request_all after a prefetch.
- * It is not okay to not end the request because the
- * prefetch started the request.
- */
- if (req) {
- /* return -ENXIO to indicate that this queue is
- * going away
- */
- req->errors = -ENXIO;
- blk_end_request_all(req, -ENXIO);
- }
-
- msleep(200); /* allow bsg to possibly finish */
- spin_lock_irq(q->queue_lock);
- }
- bsg_unregister_queue(q);
-}
-EXPORT_SYMBOL_GPL(bsg_remove_queue);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 673c977cc2bf..fb52df9744f5 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -17,8 +17,6 @@
#include "blk.h"
#include "blk-cgroup.h"
-static struct blkcg_policy blkcg_policy_cfq __maybe_unused;
-
/*
* tunables
*/
@@ -418,11 +416,6 @@ static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd)
return pd ? container_of(pd, struct cfq_group, pd) : NULL;
}
-static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
-{
- return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq));
-}
-
static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
{
return pd_to_blkg(&cfqg->pd);
@@ -572,6 +565,13 @@ static inline void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { }
#ifdef CONFIG_CFQ_GROUP_IOSCHED
+static struct blkcg_policy blkcg_policy_cfq;
+
+static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
+{
+ return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq));
+}
+
static inline void cfqg_get(struct cfq_group *cfqg)
{
return blkg_get(cfqg_to_blkg(cfqg));
@@ -3951,10 +3951,11 @@ static void cfq_exit_queue(struct elevator_queue *e)
cfq_shutdown_timer_wq(cfqd);
-#ifndef CONFIG_CFQ_GROUP_IOSCHED
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+ blkcg_deactivate_policy(q, &blkcg_policy_cfq);
+#else
kfree(cfqd->root_group);
#endif
- blkcg_deactivate_policy(q, &blkcg_policy_cfq);
kfree(cfqd);
}
@@ -4194,14 +4195,15 @@ static int __init cfq_init(void)
#ifdef CONFIG_CFQ_GROUP_IOSCHED
if (!cfq_group_idle)
cfq_group_idle = 1;
-#else
- cfq_group_idle = 0;
-#endif
ret = blkcg_policy_register(&blkcg_policy_cfq);
if (ret)
return ret;
+#else
+ cfq_group_idle = 0;
+#endif
+ ret = -ENOMEM;
cfq_pool = KMEM_CACHE(cfq_queue, 0);
if (!cfq_pool)
goto err_pol_unreg;
@@ -4215,13 +4217,17 @@ static int __init cfq_init(void)
err_free_pool:
kmem_cache_destroy(cfq_pool);
err_pol_unreg:
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
blkcg_policy_unregister(&blkcg_policy_cfq);
+#endif
return ret;
}
static void __exit cfq_exit(void)
{
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
blkcg_policy_unregister(&blkcg_policy_cfq);
+#endif
elv_unregister(&iosched_cfq);
kmem_cache_destroy(cfq_pool);
}
diff --git a/block/genhd.c b/block/genhd.c
index 9cf5583c90ff..cac7366957c3 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -154,7 +154,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
part = rcu_dereference(ptbl->part[piter->idx]);
if (!part)
continue;
- if (!part->nr_sects &&
+ if (!part_nr_sects_read(part) &&
!(piter->flags & DISK_PITER_INCL_EMPTY) &&
!(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
piter->idx == 0))
@@ -191,7 +191,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
static inline int sector_in_part(struct hd_struct *part, sector_t sector)
{
return part->start_sect <= sector &&
- sector < part->start_sect + part->nr_sects;
+ sector < part->start_sect + part_nr_sects_read(part);
}
/**
@@ -769,8 +769,8 @@ void __init printk_all_partitions(void)
printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
bdevt_str(part_devt(part), devt_buf),
- (unsigned long long)part->nr_sects >> 1,
- disk_name(disk, part->partno, name_buf),
+ (unsigned long long)part_nr_sects_read(part) >> 1
+ , disk_name(disk, part->partno, name_buf),
uuid_buf);
if (is_part0) {
if (disk->driverfs_dev != NULL &&
@@ -862,7 +862,7 @@ static int show_partition(struct seq_file *seqf, void *v)
while ((part = disk_part_iter_next(&piter)))
seq_printf(seqf, "%4d %7d %10llu %s\n",
MAJOR(part_devt(part)), MINOR(part_devt(part)),
- (unsigned long long)part->nr_sects >> 1,
+ (unsigned long long)part_nr_sects_read(part) >> 1,
disk_name(sgp, part->partno, buf));
disk_part_iter_exit(&piter);
@@ -1268,6 +1268,16 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
}
disk->part_tbl->part[0] = &disk->part0;
+ /*
+ * set_capacity() and get_capacity() currently don't use
+ * seqcounter to read/update the part0->nr_sects. Still init
+ * the counter as we can read the sectors in IO submission
+ * patch using seqence counters.
+ *
+ * TODO: Ideally set_capacity() and get_capacity() should be
+ * converted to make use of bd_mutex and sequence counters.
+ */
+ seqcount_init(&disk->part0.nr_sects_seq);
hd_ref_init(&disk->part0);
disk->minors = minors;
diff --git a/block/ioctl.c b/block/ioctl.c
index ba15b2dbfb98..4476e0e85d16 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -13,7 +13,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
{
struct block_device *bdevp;
struct gendisk *disk;
- struct hd_struct *part;
+ struct hd_struct *part, *lpart;
struct blkpg_ioctl_arg a;
struct blkpg_partition p;
struct disk_part_iter piter;
@@ -36,8 +36,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
case BLKPG_ADD_PARTITION:
start = p.start >> 9;
length = p.length >> 9;
- /* check for fit in a hd_struct */
- if (sizeof(sector_t) == sizeof(long) &&
+ /* check for fit in a hd_struct */
+ if (sizeof(sector_t) == sizeof(long) &&
sizeof(long long) > sizeof(long)) {
long pstart = start, plength = length;
if (pstart != start || plength != length
@@ -92,6 +92,59 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
bdput(bdevp);
return 0;
+ case BLKPG_RESIZE_PARTITION:
+ start = p.start >> 9;
+ /* new length of partition in bytes */
+ length = p.length >> 9;
+ /* check for fit in a hd_struct */
+ if (sizeof(sector_t) == sizeof(long) &&
+ sizeof(long long) > sizeof(long)) {
+ long pstart = start, plength = length;
+ if (pstart != start || plength != length
+ || pstart < 0 || plength < 0)
+ return -EINVAL;
+ }
+ part = disk_get_part(disk, partno);
+ if (!part)
+ return -ENXIO;
+ bdevp = bdget(part_devt(part));
+ if (!bdevp) {
+ disk_put_part(part);
+ return -ENOMEM;
+ }
+ mutex_lock(&bdevp->bd_mutex);
+ mutex_lock_nested(&bdev->bd_mutex, 1);
+ if (start != part->start_sect) {
+ mutex_unlock(&bdevp->bd_mutex);
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdevp);
+ disk_put_part(part);
+ return -EINVAL;
+ }
+ /* overlap? */
+ disk_part_iter_init(&piter, disk,
+ DISK_PITER_INCL_EMPTY);
+ while ((lpart = disk_part_iter_next(&piter))) {
+ if (lpart->partno != partno &&
+ !(start + length <= lpart->start_sect ||
+ start >= lpart->start_sect + lpart->nr_sects)
+ ) {
+ disk_part_iter_exit(&piter);
+ mutex_unlock(&bdevp->bd_mutex);
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdevp);
+ disk_put_part(part);
+ return -EBUSY;
+ }
+ }
+ disk_part_iter_exit(&piter);
+ part_nr_sects_write(part, (sector_t)length);
+ i_size_write(bdevp->bd_inode, p.length);
+ mutex_unlock(&bdevp->bd_mutex);
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdevp);
+ disk_put_part(part);
+ return 0;
default:
return -EINVAL;
}
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 6df5d6928a44..f1d14519cc04 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -84,7 +84,7 @@ ssize_t part_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
- return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
+ return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p));
}
static ssize_t part_ro_show(struct device *dev,
@@ -294,6 +294,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
err = -ENOMEM;
goto out_free;
}
+
+ seqcount_init(&p->nr_sects_seq);
pdev = part_to_dev(p);
p->start_sect = start;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 260fa80ef575..9a87daa6f4fb 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -721,11 +721,14 @@ int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd)
break;
}
+ if (capable(CAP_SYS_RAWIO))
+ return 0;
+
/* In particular, rule out all resets and host-specific ioctls. */
printk_ratelimited(KERN_WARNING
"%s: sending ioctl %x to a partition!\n", current->comm, cmd);
- return capable(CAP_SYS_RAWIO) ? 0 : -ENOIOCTLCMD;
+ return -ENOIOCTLCMD;
}
EXPORT_SYMBOL(scsi_verify_blk_ioctl);