summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/blk-cgroup.c2
-rw-r--r--block/blk-core.c127
-rw-r--r--block/blk-exec.c4
-rw-r--r--block/blk-lib.c26
-rw-r--r--block/blk-settings.c6
-rw-r--r--block/blk-sysfs.c4
-rw-r--r--block/blk-throttle.c2
-rw-r--r--block/blk.h4
-rw-r--r--block/bsg-lib.c13
-rw-r--r--block/cfq-iosched.c3
-rw-r--r--block/deadline-iosched.c2
-rw-r--r--block/elevator.c16
-rw-r--r--block/genhd.c2
-rw-r--r--block/partitions/Kconfig4
-rw-r--r--drivers/scsi/scsi_lib.c2
-rw-r--r--include/linux/backing-dev.h4
-rw-r--r--include/linux/blkdev.h17
-rw-r--r--include/linux/bsg-lib.h1
-rw-r--r--mm/backing-dev.c84
19 files changed, 224 insertions, 99 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 3f6d39d23bb6..b8858fb0cafa 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -231,7 +231,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
* we shouldn't allow anything to go through for a bypassing queue.
*/
if (unlikely(blk_queue_bypass(q)))
- return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
+ return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY);
return __blkg_lookup_create(blkcg, q, NULL);
}
EXPORT_SYMBOL_GPL(blkg_lookup_create);
diff --git a/block/blk-core.c b/block/blk-core.c
index 3c95c4d6e31a..c973249d68cd 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -40,6 +40,7 @@
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
DEFINE_IDA(blk_queue_ida);
@@ -219,12 +220,13 @@ static void blk_delay_work(struct work_struct *work)
* Description:
* Sometimes queueing needs to be postponed for a little while, to allow
* resources to come back. This function will make sure that queueing is
- * restarted around the specified time.
+ * restarted around the specified time. Queue lock must be held.
*/
void blk_delay_queue(struct request_queue *q, unsigned long msecs)
{
- queue_delayed_work(kblockd_workqueue, &q->delay_work,
- msecs_to_jiffies(msecs));
+ if (likely(!blk_queue_dead(q)))
+ queue_delayed_work(kblockd_workqueue, &q->delay_work,
+ msecs_to_jiffies(msecs));
}
EXPORT_SYMBOL(blk_delay_queue);
@@ -293,6 +295,34 @@ void blk_sync_queue(struct request_queue *q)
EXPORT_SYMBOL(blk_sync_queue);
/**
+ * __blk_run_queue_uncond - run a queue whether or not it has been stopped
+ * @q: The queue to run
+ *
+ * Description:
+ * Invoke request handling on a queue if there are any pending requests.
+ * May be used to restart request handling after a request has completed.
+ * This variant runs the queue whether or not the queue has been
+ * stopped. Must be called with the queue lock held and interrupts
+ * disabled. See also @blk_run_queue.
+ */
+inline void __blk_run_queue_uncond(struct request_queue *q)
+{
+ if (unlikely(blk_queue_dead(q)))
+ return;
+
+ /*
+ * Some request_fn implementations, e.g. scsi_request_fn(), unlock
+ * the queue lock internally. As a result multiple threads may be
+ * running such a request function concurrently. Keep track of the
+ * number of active request_fn invocations such that blk_drain_queue()
+ * can wait until all these request_fn calls have finished.
+ */
+ q->request_fn_active++;
+ q->request_fn(q);
+ q->request_fn_active--;
+}
+
+/**
* __blk_run_queue - run a single device queue
* @q: The queue to run
*
@@ -305,7 +335,7 @@ void __blk_run_queue(struct request_queue *q)
if (unlikely(blk_queue_stopped(q)))
return;
- q->request_fn(q);
+ __blk_run_queue_uncond(q);
}
EXPORT_SYMBOL(__blk_run_queue);
@@ -315,11 +345,11 @@ EXPORT_SYMBOL(__blk_run_queue);
*
* Description:
* Tells kblockd to perform the equivalent of @blk_run_queue on behalf
- * of us.
+ * of us. The caller must hold the queue lock.
*/
void blk_run_queue_async(struct request_queue *q)
{
- if (likely(!blk_queue_stopped(q)))
+ if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
}
EXPORT_SYMBOL(blk_run_queue_async);
@@ -349,7 +379,7 @@ void blk_put_queue(struct request_queue *q)
EXPORT_SYMBOL(blk_put_queue);
/**
- * blk_drain_queue - drain requests from request_queue
+ * __blk_drain_queue - drain requests from request_queue
* @q: queue to drain
* @drain_all: whether to drain all requests or only the ones w/ ELVPRIV
*
@@ -357,15 +387,17 @@ EXPORT_SYMBOL(blk_put_queue);
* If not, only ELVPRIV requests are drained. The caller is responsible
* for ensuring that no new requests which need to be drained are queued.
*/
-void blk_drain_queue(struct request_queue *q, bool drain_all)
+static void __blk_drain_queue(struct request_queue *q, bool drain_all)
+ __releases(q->queue_lock)
+ __acquires(q->queue_lock)
{
int i;
+ lockdep_assert_held(q->queue_lock);
+
while (true) {
bool drain = false;
- spin_lock_irq(q->queue_lock);
-
/*
* The caller might be trying to drain @q before its
* elevator is initialized.
@@ -386,6 +418,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
__blk_run_queue(q);
drain |= q->nr_rqs_elvpriv;
+ drain |= q->request_fn_active;
/*
* Unfortunately, requests are queued at and tracked from
@@ -401,11 +434,14 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
}
}
- spin_unlock_irq(q->queue_lock);
-
if (!drain)
break;
+
+ spin_unlock_irq(q->queue_lock);
+
msleep(10);
+
+ spin_lock_irq(q->queue_lock);
}
/*
@@ -416,13 +452,9 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
if (q->request_fn) {
struct request_list *rl;
- spin_lock_irq(q->queue_lock);
-
blk_queue_for_each_rl(rl, q)
for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
wake_up_all(&rl->wait[i]);
-
- spin_unlock_irq(q->queue_lock);
}
}
@@ -446,7 +478,10 @@ void blk_queue_bypass_start(struct request_queue *q)
spin_unlock_irq(q->queue_lock);
if (drain) {
- blk_drain_queue(q, false);
+ spin_lock_irq(q->queue_lock);
+ __blk_drain_queue(q, false);
+ spin_unlock_irq(q->queue_lock);
+
/* ensure blk_queue_bypass() is %true inside RCU read lock */
synchronize_rcu();
}
@@ -473,20 +508,20 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
* blk_cleanup_queue - shutdown a request queue
* @q: request queue to shutdown
*
- * Mark @q DEAD, drain all pending requests, destroy and put it. All
- * future requests will be failed immediately with -ENODEV.
+ * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
+ * put it. All future requests will be failed immediately with -ENODEV.
*/
void blk_cleanup_queue(struct request_queue *q)
{
spinlock_t *lock = q->queue_lock;
- /* mark @q DEAD, no new request or merges will be allowed afterwards */
+ /* mark @q DYING, no new request or merges will be allowed afterwards */
mutex_lock(&q->sysfs_lock);
- queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
+ queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
spin_lock_irq(lock);
/*
- * Dead queue is permanently in bypass mode till released. Note
+ * A dying queue is permanently in bypass mode till released. Note
* that, unlike blk_queue_bypass_start(), we aren't performing
* synchronize_rcu() after entering bypass mode to avoid the delay
* as some drivers create and destroy a lot of queues while
@@ -499,12 +534,18 @@ void blk_cleanup_queue(struct request_queue *q)
queue_flag_set(QUEUE_FLAG_NOMERGES, q);
queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
- queue_flag_set(QUEUE_FLAG_DEAD, q);
+ queue_flag_set(QUEUE_FLAG_DYING, q);
spin_unlock_irq(lock);
mutex_unlock(&q->sysfs_lock);
- /* drain all requests queued before DEAD marking */
- blk_drain_queue(q, true);
+ /*
+ * Drain all requests queued before DYING marking. Set DEAD flag to
+ * prevent that q->request_fn() gets invoked after draining finished.
+ */
+ spin_lock_irq(lock);
+ __blk_drain_queue(q, true);
+ queue_flag_set(QUEUE_FLAG_DEAD, q);
+ spin_unlock_irq(lock);
/* @q won't process any more request, flush async actions */
del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
@@ -549,7 +590,7 @@ void blk_exit_rl(struct request_list *rl)
struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
{
- return blk_alloc_queue_node(gfp_mask, -1);
+ return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
}
EXPORT_SYMBOL(blk_alloc_queue);
@@ -660,7 +701,7 @@ EXPORT_SYMBOL(blk_alloc_queue_node);
struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
{
- return blk_init_queue_node(rfn, lock, -1);
+ return blk_init_queue_node(rfn, lock, NUMA_NO_NODE);
}
EXPORT_SYMBOL(blk_init_queue);
@@ -716,7 +757,7 @@ EXPORT_SYMBOL(blk_init_allocated_queue);
bool blk_get_queue(struct request_queue *q)
{
- if (likely(!blk_queue_dead(q))) {
+ if (likely(!blk_queue_dying(q))) {
__blk_get_queue(q);
return true;
}
@@ -870,7 +911,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
const bool is_sync = rw_is_sync(rw_flags) != 0;
int may_queue;
- if (unlikely(blk_queue_dead(q)))
+ if (unlikely(blk_queue_dying(q)))
return NULL;
may_queue = elv_may_queue(q, rw_flags);
@@ -1050,7 +1091,7 @@ retry:
if (rq)
return rq;
- if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) {
+ if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) {
blk_put_rl(rl);
return NULL;
}
@@ -1910,7 +1951,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
return -EIO;
spin_lock_irqsave(q->queue_lock, flags);
- if (unlikely(blk_queue_dead(q))) {
+ if (unlikely(blk_queue_dying(q))) {
spin_unlock_irqrestore(q->queue_lock, flags);
return -ENODEV;
}
@@ -2884,27 +2925,11 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
{
trace_block_unplug(q, depth, !from_schedule);
- /*
- * Don't mess with dead queue.
- */
- if (unlikely(blk_queue_dead(q))) {
- spin_unlock(q->queue_lock);
- return;
- }
-
- /*
- * If we are punting this to kblockd, then we can safely drop
- * the queue_lock before waking kblockd (which needs to take
- * this lock).
- */
- if (from_schedule) {
- spin_unlock(q->queue_lock);
+ if (from_schedule)
blk_run_queue_async(q);
- } else {
+ else
__blk_run_queue(q);
- spin_unlock(q->queue_lock);
- }
-
+ spin_unlock(q->queue_lock);
}
static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
@@ -2996,7 +3021,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
/*
* Short-circuit if @q is dead
*/
- if (unlikely(blk_queue_dead(q))) {
+ if (unlikely(blk_queue_dying(q))) {
__blk_end_request_all(rq, -ENODEV);
continue;
}
diff --git a/block/blk-exec.c b/block/blk-exec.c
index f71eac35c1b9..74638ec234c8 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -66,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
spin_lock_irq(q->queue_lock);
- if (unlikely(blk_queue_dead(q))) {
+ if (unlikely(blk_queue_dying(q))) {
rq->errors = -ENXIO;
if (rq->end_io)
rq->end_io(rq, rq->errors);
@@ -78,7 +78,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
__blk_run_queue(q);
/* the queue is stopped so it won't be run */
if (is_pm_resume)
- q->request_fn(q);
+ __blk_run_queue_uncond(q);
spin_unlock_irq(q->queue_lock);
}
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9373b58dfab1..b3a1f2b70b31 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -43,11 +43,12 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q = bdev_get_queue(bdev);
int type = REQ_WRITE | REQ_DISCARD;
- unsigned int max_discard_sectors;
- unsigned int granularity, alignment, mask;
+ sector_t max_discard_sectors;
+ sector_t granularity, alignment;
struct bio_batch bb;
struct bio *bio;
int ret = 0;
+ struct blk_plug plug;
if (!q)
return -ENXIO;
@@ -57,15 +58,16 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
/* Zero-sector (unknown) and one-sector granularities are the same. */
granularity = max(q->limits.discard_granularity >> 9, 1U);
- mask = granularity - 1;
- alignment = (bdev_discard_alignment(bdev) >> 9) & mask;
+ alignment = bdev_discard_alignment(bdev) >> 9;
+ alignment = sector_div(alignment, granularity);
/*
* Ensure that max_discard_sectors is of the proper
* granularity, so that requests stay aligned after a split.
*/
max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
- max_discard_sectors = round_down(max_discard_sectors, granularity);
+ sector_div(max_discard_sectors, granularity);
+ max_discard_sectors *= granularity;
if (unlikely(!max_discard_sectors)) {
/* Avoid infinite loop below. Being cautious never hurts. */
return -EOPNOTSUPP;
@@ -81,9 +83,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
bb.flags = 1 << BIO_UPTODATE;
bb.wait = &wait;
+ blk_start_plug(&plug);
while (nr_sects) {
unsigned int req_sects;
- sector_t end_sect;
+ sector_t end_sect, tmp;
bio = bio_alloc(gfp_mask, 1);
if (!bio) {
@@ -98,10 +101,12 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
* misaligned, stop the discard at the previous aligned sector.
*/
end_sect = sector + req_sects;
- if (req_sects < nr_sects && (end_sect & mask) != alignment) {
- end_sect =
- round_down(end_sect - alignment, granularity)
- + alignment;
+ tmp = end_sect;
+ if (req_sects < nr_sects &&
+ sector_div(tmp, granularity) != alignment) {
+ end_sect = end_sect - alignment;
+ sector_div(end_sect, granularity);
+ end_sect = end_sect * granularity + alignment;
req_sects = end_sect - sector;
}
@@ -117,6 +122,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
atomic_inc(&bb.done);
submit_bio(type, bio);
}
+ blk_finish_plug(&plug);
/* Wait for bios in-flight */
if (!atomic_dec_and_test(&bb.done))
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 779bb7646bcd..c50ecf0ea3b1 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -611,7 +611,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
bottom = b->discard_granularity + alignment;
/* Verify that top and bottom intervals line up */
- if (max(top, bottom) & (min(top, bottom) - 1))
+ if ((max(top, bottom) % min(top, bottom)) != 0)
t->discard_misaligned = 1;
}
@@ -619,8 +619,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
b->max_discard_sectors);
t->discard_granularity = max(t->discard_granularity,
b->discard_granularity);
- t->discard_alignment = lcm(t->discard_alignment, alignment) &
- (t->discard_granularity - 1);
+ t->discard_alignment = lcm(t->discard_alignment, alignment) %
+ t->discard_granularity;
}
return ret;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index ce6204608822..788147797a79 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -466,7 +466,7 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
if (!entry->show)
return -EIO;
mutex_lock(&q->sysfs_lock);
- if (blk_queue_dead(q)) {
+ if (blk_queue_dying(q)) {
mutex_unlock(&q->sysfs_lock);
return -ENOENT;
}
@@ -488,7 +488,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
q = container_of(kobj, struct request_queue, kobj);
mutex_lock(&q->sysfs_lock);
- if (blk_queue_dead(q)) {
+ if (blk_queue_dying(q)) {
mutex_unlock(&q->sysfs_lock);
return -ENOENT;
}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a9664fa0b609..31146225f3d0 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -302,7 +302,7 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
/* if %NULL and @q is alive, fall back to root_tg */
if (!IS_ERR(blkg))
tg = blkg_to_tg(blkg);
- else if (!blk_queue_dead(q))
+ else if (!blk_queue_dying(q))
tg = td_root_tg(td);
}
diff --git a/block/blk.h b/block/blk.h
index ca51543b248c..47fdfdd41520 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -96,7 +96,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
q->flush_queue_delayed = 1;
return NULL;
}
- if (unlikely(blk_queue_dead(q)) ||
+ if (unlikely(blk_queue_dying(q)) ||
!q->elevator->type->ops.elevator_dispatch_fn(q, 0))
return NULL;
}
@@ -145,6 +145,8 @@ int blk_try_merge(struct request *rq, struct bio *bio);
void blk_queue_congestion_threshold(struct request_queue *q);
+void __blk_run_queue_uncond(struct request_queue *q);
+
int blk_dev_init(void);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index deee61fbb741..650f427d915b 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -151,19 +151,6 @@ failjob_rls_job:
return -ENOMEM;
}
-/*
- * bsg_goose_queue - restart queue in case it was stopped
- * @q: request q to be restarted
- */
-void bsg_goose_queue(struct request_queue *q)
-{
- if (!q)
- return;
-
- blk_run_queue_async(q);
-}
-EXPORT_SYMBOL_GPL(bsg_goose_queue);
-
/**
* bsg_request_fn - generic handler for bsg requests
* @q: request queue to manage
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index fb52df9744f5..e62e9205b80a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1973,7 +1973,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
* reposition in fifo if next is older than rq
*/
if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
- time_before(rq_fifo_time(next), rq_fifo_time(rq))) {
+ time_before(rq_fifo_time(next), rq_fifo_time(rq)) &&
+ cfqq == RQ_CFQQ(next)) {
list_move(&rq->queuelist, &next->queuelist);
rq_set_fifo_time(rq, rq_fifo_time(next));
}
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 599b12e5380f..90037b5eb17f 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -230,7 +230,7 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
/*
* rq is expired!
*/
- if (time_after(jiffies, rq_fifo_time(rq)))
+ if (time_after_eq(jiffies, rq_fifo_time(rq)))
return 1;
return 0;
diff --git a/block/elevator.c b/block/elevator.c
index 9b1d42b62f20..9edba1b8323e 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -458,6 +458,7 @@ static bool elv_attempt_insert_merge(struct request_queue *q,
struct request *rq)
{
struct request *__rq;
+ bool ret;
if (blk_queue_nomerges(q))
return false;
@@ -471,14 +472,21 @@ static bool elv_attempt_insert_merge(struct request_queue *q,
if (blk_queue_noxmerges(q))
return false;
+ ret = false;
/*
* See if our hash lookup can find a potential backmerge.
*/
- __rq = elv_rqhash_find(q, blk_rq_pos(rq));
- if (__rq && blk_attempt_req_merge(q, __rq, rq))
- return true;
+ while (1) {
+ __rq = elv_rqhash_find(q, blk_rq_pos(rq));
+ if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
+ break;
- return false;
+ /* The merged request could be merged with others, try again */
+ ret = true;
+ rq = __rq;
+ }
+
+ return ret;
}
void elv_merged_request(struct request_queue *q, struct request *rq, int type)
diff --git a/block/genhd.c b/block/genhd.c
index 6cace663a80e..2a6fdf539a69 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1245,7 +1245,7 @@ EXPORT_SYMBOL(blk_lookup_devt);
struct gendisk *alloc_disk(int minors)
{
- return alloc_disk_node(minors, -1);
+ return alloc_disk_node(minors, NUMA_NO_NODE);
}
EXPORT_SYMBOL(alloc_disk);
diff --git a/block/partitions/Kconfig b/block/partitions/Kconfig
index cb5f0a3f1b03..75a54e1adbb5 100644
--- a/block/partitions/Kconfig
+++ b/block/partitions/Kconfig
@@ -234,8 +234,8 @@ config KARMA_PARTITION
uses a proprietary partition table.
config EFI_PARTITION
- bool "EFI GUID Partition support"
- depends on PARTITION_ADVANCED
+ bool "EFI GUID Partition support" if PARTITION_ADVANCED
+ default y
select CRC32
help
Say Y here if you would like to use hard disks under Linux which
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9032e910bca3..f1bf5aff68ed 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1418,7 +1418,7 @@ static int scsi_lld_busy(struct request_queue *q)
struct scsi_device *sdev = q->queuedata;
struct Scsi_Host *shost;
- if (blk_queue_dead(q))
+ if (blk_queue_dying(q))
return 0;
shost = sdev->host;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 2a9a9abc9126..238521a19849 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -18,6 +18,7 @@
#include <linux/writeback.h>
#include <linux/atomic.h>
#include <linux/sysctl.h>
+#include <linux/mutex.h>
struct page;
struct device;
@@ -105,6 +106,9 @@ struct backing_dev_info {
struct timer_list laptop_mode_wb_timer;
+ cpumask_t *flusher_cpumask; /* used for writeback thread scheduling */
+ struct mutex flusher_cpumask_lock;
+
#ifdef CONFIG_DEBUG_FS
struct dentry *debug_dir;
struct dentry *debug_stats;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1756001210d2..acb4f7bbbd32 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -378,6 +378,12 @@ struct request_queue {
unsigned int nr_sorted;
unsigned int in_flight[2];
+ /*
+ * Number of active block driver functions for which blk_drain_queue()
+ * must wait. Must be incremented around functions that unlock the
+ * queue_lock internally, e.g. scsi_request_fn().
+ */
+ unsigned int request_fn_active;
unsigned int rq_timeout;
struct timer_list timeout;
@@ -437,7 +443,7 @@ struct request_queue {
#define QUEUE_FLAG_STOPPED 2 /* queue is stopped */
#define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */
#define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */
-#define QUEUE_FLAG_DEAD 5 /* queue being torn down */
+#define QUEUE_FLAG_DYING 5 /* queue being torn down */
#define QUEUE_FLAG_BYPASS 6 /* act as dumb FIFO queue */
#define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */
#define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */
@@ -452,6 +458,7 @@ struct request_queue {
#define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */
#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */
#define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */
+#define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \
@@ -521,6 +528,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
+#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
#define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags)
#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
@@ -1180,13 +1188,14 @@ static inline int queue_discard_alignment(struct request_queue *q)
static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
{
- unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
+ sector_t alignment = sector << 9;
+ alignment = sector_div(alignment, lim->discard_granularity);
if (!lim->max_discard_sectors)
return 0;
- return (lim->discard_granularity + lim->discard_alignment - alignment)
- & (lim->discard_granularity - 1);
+ alignment = lim->discard_granularity + lim->discard_alignment - alignment;
+ return sector_div(alignment, lim->discard_granularity);
}
static inline int bdev_discard_alignment(struct block_device *bdev)
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 4d0fb3df2f4a..a226652a5a6c 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -67,6 +67,5 @@ void bsg_job_done(struct bsg_job *job, int result,
int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
bsg_job_fn *job_fn, int dd_job_size);
void bsg_request_fn(struct request_queue *q);
-void bsg_goose_queue(struct request_queue *q);
#endif
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d3ca2b3ee176..bd6a6cabef71 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/writeback.h>
#include <linux/device.h>
+#include <linux/slab.h>
#include <trace/events/writeback.h>
static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
@@ -221,12 +222,63 @@ static ssize_t max_ratio_store(struct device *dev,
}
BDI_SHOW(max_ratio, bdi->max_ratio)
+static ssize_t cpu_list_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct backing_dev_info *bdi = dev_get_drvdata(dev);
+ struct bdi_writeback *wb = &bdi->wb;
+ cpumask_var_t newmask;
+ ssize_t ret;
+ struct task_struct *task;
+
+ if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
+ return -ENOMEM;
+
+ ret = cpulist_parse(buf, newmask);
+ if (!ret) {
+ spin_lock_bh(&bdi->wb_lock);
+ task = wb->task;
+ if (task)
+ get_task_struct(task);
+ spin_unlock_bh(&bdi->wb_lock);
+
+ mutex_lock(&bdi->flusher_cpumask_lock);
+ if (task) {
+ ret = set_cpus_allowed_ptr(task, newmask);
+ put_task_struct(task);
+ }
+ if (ret == 0) {
+ cpumask_copy(bdi->flusher_cpumask, newmask);
+ ret = count;
+ }
+ mutex_unlock(&bdi->flusher_cpumask_lock);
+
+ }
+ free_cpumask_var(newmask);
+
+ return ret;
+}
+
+static ssize_t cpu_list_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct backing_dev_info *bdi = dev_get_drvdata(dev);
+ ssize_t ret;
+
+ mutex_lock(&bdi->flusher_cpumask_lock);
+ ret = cpulist_scnprintf(page, PAGE_SIZE-1, bdi->flusher_cpumask);
+ mutex_unlock(&bdi->flusher_cpumask_lock);
+
+ return ret;
+}
+
#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
static struct device_attribute bdi_dev_attrs[] = {
__ATTR_RW(read_ahead_kb),
__ATTR_RW(min_ratio),
__ATTR_RW(max_ratio),
+ __ATTR_RW(cpu_list),
__ATTR_NULL,
};
@@ -428,6 +480,7 @@ static int bdi_forker_thread(void *ptr)
writeback_inodes_wb(&bdi->wb, 1024,
WB_REASON_FORKER_THREAD);
} else {
+ int ret;
/*
* The spinlock makes sure we do not lose
* wake-ups when racing with 'bdi_queue_work()'.
@@ -437,6 +490,14 @@ static int bdi_forker_thread(void *ptr)
spin_lock_bh(&bdi->wb_lock);
bdi->wb.task = task;
spin_unlock_bh(&bdi->wb_lock);
+ mutex_lock(&bdi->flusher_cpumask_lock);
+ ret = set_cpus_allowed_ptr(task,
+ bdi->flusher_cpumask);
+ mutex_unlock(&bdi->flusher_cpumask_lock);
+ if (ret)
+ printk_once("%s: failed to bind flusher"
+ " thread %s, error %d\n",
+ __func__, task->comm, ret);
wake_up_process(task);
}
bdi_clear_pending(bdi);
@@ -509,6 +570,17 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
dev_name(dev));
if (IS_ERR(wb->task))
return PTR_ERR(wb->task);
+ } else {
+ int node;
+ /*
+ * Set up a default cpumask for the flusher threads that
+ * includes all cpus on the same numa node as the device.
+ * The mask may be overridden via sysfs.
+ */
+ node = dev_to_node(bdi->dev);
+ if (node != NUMA_NO_NODE)
+ cpumask_copy(bdi->flusher_cpumask,
+ cpumask_of_node(node));
}
bdi_debug_register(bdi, dev_name(dev));
@@ -634,6 +706,15 @@ int bdi_init(struct backing_dev_info *bdi)
bdi_wb_init(&bdi->wb, bdi);
+ if (!bdi_cap_flush_forker(bdi)) {
+ bdi->flusher_cpumask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+ if (!bdi->flusher_cpumask)
+ return -ENOMEM;
+ cpumask_setall(bdi->flusher_cpumask);
+ mutex_init(&bdi->flusher_cpumask_lock);
+ } else
+ bdi->flusher_cpumask = NULL;
+
for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
err = percpu_counter_init(&bdi->bdi_stat[i], 0);
if (err)
@@ -656,6 +737,7 @@ int bdi_init(struct backing_dev_info *bdi)
err:
while (i--)
percpu_counter_destroy(&bdi->bdi_stat[i]);
+ kfree(bdi->flusher_cpumask);
}
return err;
@@ -683,6 +765,8 @@ void bdi_destroy(struct backing_dev_info *bdi)
bdi_unregister(bdi);
+ kfree(bdi->flusher_cpumask);
+
/*
* If bdi_unregister() had already been called earlier, the
* wakeup_timer could still be armed because bdi_prune_sb()