summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig6
-rw-r--r--block/Makefile3
-rw-r--r--block/bfq-iosched.c1
-rw-r--r--block/bfq-wf2q.c5
-rw-r--r--block/bio.c49
-rw-r--r--block/blk-merge.c23
-rw-r--r--block/blk-mq.c154
-rw-r--r--block/blk-settings.c2
-rw-r--r--block/blk-zoned.c2
-rw-r--r--block/partition-generic.c26
-rw-r--r--block/partitions/ldm.c2
-rw-r--r--block/t10-pi.c3
12 files changed, 205 insertions, 71 deletions
diff --git a/block/Kconfig b/block/Kconfig
index c23094a14a2b..3bc76bb113a0 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -66,7 +66,6 @@ config BLK_DEV_BSGLIB
config BLK_DEV_INTEGRITY
bool "Block layer data integrity support"
- select CRC_T10DIF if BLK_DEV_INTEGRITY
---help---
Some storage devices allow extra information to be
stored/retrieved to help protect the data. The block layer
@@ -77,6 +76,11 @@ config BLK_DEV_INTEGRITY
T10/SCSI Data Integrity Field or the T13/ATA External Path
Protection. If in doubt, say N.
+config BLK_DEV_INTEGRITY_T10
+ tristate
+ depends on BLK_DEV_INTEGRITY
+ select CRC_T10DIF
+
config BLK_DEV_ZONED
bool "Zoned block device support"
select MQ_IOSCHED_DEADLINE
diff --git a/block/Makefile b/block/Makefile
index 1f70c73ea83d..1a43750f4b01 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -26,7 +26,8 @@ bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
-obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
+obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o
+obj-$(CONFIG_BLK_DEV_INTEGRITY_T10) += t10-pi.o
obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o
obj-$(CONFIG_BLK_MQ_RDMA) += blk-mq-rdma.o
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index ad4af4aaf2ce..4686b68b48b4 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -427,7 +427,6 @@ void bfq_schedule_dispatch(struct bfq_data *bfqd)
}
#define bfq_class_idle(bfqq) ((bfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
-#define bfq_class_rt(bfqq) ((bfqq)->ioprio_class == IOPRIO_CLASS_RT)
#define bfq_sample_valid(samples) ((samples) > 80)
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 05f0bf4a1144..ffe9ce9faa89 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -277,10 +277,7 @@ struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
*/
static u64 bfq_delta(unsigned long service, unsigned long weight)
{
- u64 d = (u64)service << WFQ_SERVICE_SHIFT;
-
- do_div(d, weight);
- return d;
+ return div64_ul((u64)service << WFQ_SERVICE_SHIFT, weight);
}
/**
diff --git a/block/bio.c b/block/bio.c
index a5d75f6bf4c7..94d697217887 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -539,6 +539,55 @@ void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
EXPORT_SYMBOL(zero_fill_bio_iter);
/**
+ * bio_truncate - truncate the bio to small size of @new_size
+ * @bio: the bio to be truncated
+ * @new_size: new size for truncating the bio
+ *
+ * Description:
+ * Truncate the bio to new size of @new_size. If bio_op(bio) is
+ * REQ_OP_READ, zero the truncated part. This function should only
+ * be used for handling corner cases, such as bio eod.
+ */
+void bio_truncate(struct bio *bio, unsigned new_size)
+{
+ struct bio_vec bv;
+ struct bvec_iter iter;
+ unsigned int done = 0;
+ bool truncated = false;
+
+ if (new_size >= bio->bi_iter.bi_size)
+ return;
+
+ if (bio_op(bio) != REQ_OP_READ)
+ goto exit;
+
+ bio_for_each_segment(bv, bio, iter) {
+ if (done + bv.bv_len > new_size) {
+ unsigned offset;
+
+ if (!truncated)
+ offset = new_size - done;
+ else
+ offset = 0;
+ zero_user(bv.bv_page, offset, bv.bv_len - offset);
+ truncated = true;
+ }
+ done += bv.bv_len;
+ }
+
+ exit:
+ /*
+ * Don't touch bvec table here and make it really immutable, since
+ * fs bio user has to retrieve all pages via bio_for_each_segment_all
+ * in its .end_bio() callback.
+ *
+ * It is enough to truncate bio by updating .bi_size since we can make
+ * correct bvec with the updated .bi_size for drivers.
+ */
+ bio->bi_iter.bi_size = new_size;
+}
+
+/**
* bio_put - release a reference to a bio
* @bio: bio to release reference to
*
diff --git a/block/blk-merge.c b/block/blk-merge.c
index d783bdc4559b..1534ed736363 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -157,17 +157,20 @@ static inline unsigned get_max_io_size(struct request_queue *q,
return sectors & (lbs - 1);
}
-static unsigned get_max_segment_size(const struct request_queue *q,
- unsigned offset)
+static inline unsigned get_max_segment_size(const struct request_queue *q,
+ struct page *start_page,
+ unsigned long offset)
{
unsigned long mask = queue_segment_boundary(q);
- /* default segment boundary mask means no boundary limit */
- if (mask == BLK_SEG_BOUNDARY_MASK)
- return queue_max_segment_size(q);
+ offset = mask & (page_to_phys(start_page) + offset);
- return min_t(unsigned long, mask - (mask & offset) + 1,
- queue_max_segment_size(q));
+ /*
+ * overflow may be triggered in case of zero page physical address
+ * on 32bit arch, use queue's max segment size when that happens.
+ */
+ return min_not_zero(mask - offset + 1,
+ (unsigned long)queue_max_segment_size(q));
}
/**
@@ -201,7 +204,8 @@ static bool bvec_split_segs(const struct request_queue *q,
unsigned seg_size = 0;
while (len && *nsegs < max_segs) {
- seg_size = get_max_segment_size(q, bv->bv_offset + total_len);
+ seg_size = get_max_segment_size(q, bv->bv_page,
+ bv->bv_offset + total_len);
seg_size = min(seg_size, len);
(*nsegs)++;
@@ -419,7 +423,8 @@ static unsigned blk_bvec_map_sg(struct request_queue *q,
while (nbytes > 0) {
unsigned offset = bvec->bv_offset + total;
- unsigned len = min(get_max_segment_size(q, offset), nbytes);
+ unsigned len = min(get_max_segment_size(q, bvec->bv_page,
+ offset), nbytes);
struct page *page = bvec->bv_page;
/*
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 323c9cb28066..a12b1763508d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -641,6 +641,14 @@ bool blk_mq_complete_request(struct request *rq)
}
EXPORT_SYMBOL(blk_mq_complete_request);
+/**
+ * blk_mq_start_request - Start processing a request
+ * @rq: Pointer to request to be started
+ *
+ * Function used by device drivers to notify the block layer that a request
+ * is going to be processed now, so blk layer can do proper initializations
+ * such as starting the timeout timer.
+ */
void blk_mq_start_request(struct request *rq)
{
struct request_queue *q = rq->q;
@@ -1327,6 +1335,12 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
return (queued + errors) != 0;
}
+/**
+ * __blk_mq_run_hw_queue - Run a hardware queue.
+ * @hctx: Pointer to the hardware queue to run.
+ *
+ * Send pending requests to the hardware.
+ */
static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
{
int srcu_idx;
@@ -1424,6 +1438,15 @@ select_cpu:
return next_cpu;
}
+/**
+ * __blk_mq_delay_run_hw_queue - Run (or schedule to run) a hardware queue.
+ * @hctx: Pointer to the hardware queue to run.
+ * @async: If we want to run the queue asynchronously.
+ * @msecs: Microseconds of delay to wait before running the queue.
+ *
+ * If !@async, try to run the queue now. Else, run the queue asynchronously and
+ * with a delay of @msecs.
+ */
static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
unsigned long msecs)
{
@@ -1445,12 +1468,28 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
msecs_to_jiffies(msecs));
}
+/**
+ * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously.
+ * @hctx: Pointer to the hardware queue to run.
+ * @msecs: Microseconds of delay to wait before running the queue.
+ *
+ * Run a hardware queue asynchronously with a delay of @msecs.
+ */
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
{
__blk_mq_delay_run_hw_queue(hctx, true, msecs);
}
EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
+/**
+ * blk_mq_run_hw_queue - Start to run a hardware queue.
+ * @hctx: Pointer to the hardware queue to run.
+ * @async: If we want to run the queue asynchronously.
+ *
+ * Check if the request queue is not in a quiesced state and if there are
+ * pending requests to be sent. If this is true, run the queue to send requests
+ * to hardware.
+ */
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
{
int srcu_idx;
@@ -1474,6 +1513,11 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
}
EXPORT_SYMBOL(blk_mq_run_hw_queue);
+/**
+ * blk_mq_run_hw_queue - Run all hardware queues in a request queue.
+ * @q: Pointer to the request queue to run.
+ * @async: If we want to run the queue asynchronously.
+ */
void blk_mq_run_hw_queues(struct request_queue *q, bool async)
{
struct blk_mq_hw_ctx *hctx;
@@ -1625,7 +1669,11 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
blk_mq_hctx_mark_pending(hctx, ctx);
}
-/*
+/**
+ * blk_mq_request_bypass_insert - Insert a request at dispatch list.
+ * @rq: Pointer to request to be inserted.
+ * @run_queue: If we should run the hardware queue after inserting the request.
+ *
* Should only be used carefully, when the caller knows we want to
* bypass a potential IO scheduler on the target device.
*/
@@ -1668,28 +1716,20 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
struct request *rqa = container_of(a, struct request, queuelist);
struct request *rqb = container_of(b, struct request, queuelist);
- if (rqa->mq_ctx < rqb->mq_ctx)
- return -1;
- else if (rqa->mq_ctx > rqb->mq_ctx)
- return 1;
- else if (rqa->mq_hctx < rqb->mq_hctx)
- return -1;
- else if (rqa->mq_hctx > rqb->mq_hctx)
- return 1;
+ if (rqa->mq_ctx != rqb->mq_ctx)
+ return rqa->mq_ctx > rqb->mq_ctx;
+ if (rqa->mq_hctx != rqb->mq_hctx)
+ return rqa->mq_hctx > rqb->mq_hctx;
return blk_rq_pos(rqa) > blk_rq_pos(rqb);
}
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
- struct blk_mq_hw_ctx *this_hctx;
- struct blk_mq_ctx *this_ctx;
- struct request_queue *this_q;
- struct request *rq;
LIST_HEAD(list);
- LIST_HEAD(rq_list);
- unsigned int depth;
+ if (list_empty(&plug->mq_list))
+ return;
list_splice_init(&plug->mq_list, &list);
if (plug->rq_count > 2 && plug->multiple_queues)
@@ -1697,42 +1737,27 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
plug->rq_count = 0;
- this_q = NULL;
- this_hctx = NULL;
- this_ctx = NULL;
- depth = 0;
-
- while (!list_empty(&list)) {
- rq = list_entry_rq(list.next);
- list_del_init(&rq->queuelist);
- BUG_ON(!rq->q);
- if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx) {
- if (this_hctx) {
- trace_block_unplug(this_q, depth, !from_schedule);
- blk_mq_sched_insert_requests(this_hctx, this_ctx,
- &rq_list,
- from_schedule);
- }
-
- this_q = rq->q;
- this_ctx = rq->mq_ctx;
- this_hctx = rq->mq_hctx;
- depth = 0;
+ do {
+ struct list_head rq_list;
+ struct request *rq, *head_rq = list_entry_rq(list.next);
+ struct list_head *pos = &head_rq->queuelist; /* skip first */
+ struct blk_mq_hw_ctx *this_hctx = head_rq->mq_hctx;
+ struct blk_mq_ctx *this_ctx = head_rq->mq_ctx;
+ unsigned int depth = 1;
+
+ list_for_each_continue(pos, &list) {
+ rq = list_entry_rq(pos);
+ BUG_ON(!rq->q);
+ if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx)
+ break;
+ depth++;
}
- depth++;
- list_add_tail(&rq->queuelist, &rq_list);
- }
-
- /*
- * If 'this_hctx' is set, we know we have entries to complete
- * on 'rq_list'. Do those.
- */
- if (this_hctx) {
- trace_block_unplug(this_q, depth, !from_schedule);
+ list_cut_before(&rq_list, &list, pos);
+ trace_block_unplug(head_rq->q, depth, !from_schedule);
blk_mq_sched_insert_requests(this_hctx, this_ctx, &rq_list,
from_schedule);
- }
+ } while(!list_empty(&list));
}
static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
@@ -1828,6 +1853,17 @@ insert:
return BLK_STS_OK;
}
+/**
+ * blk_mq_try_issue_directly - Try to send a request directly to device driver.
+ * @hctx: Pointer of the associated hardware queue.
+ * @rq: Pointer to request to be sent.
+ * @cookie: Request queue cookie.
+ *
+ * If the device has enough resources to accept a new request now, send the
+ * request directly to device driver. Else, insert at hctx->dispatch queue, so
+ * we can try send it another time in the future. Requests inserted at this
+ * queue have higher priority.
+ */
static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
struct request *rq, blk_qc_t *cookie)
{
@@ -1905,6 +1941,22 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
}
}
+/**
+ * blk_mq_make_request - Create and send a request to block device.
+ * @q: Request queue pointer.
+ * @bio: Bio pointer.
+ *
+ * Builds up a request structure from @q and @bio and send to the device. The
+ * request may not be queued directly to hardware if:
+ * * This request can be merged with another one
+ * * We want to place request at plug queue for possible future merging
+ * * There is an IO scheduler active at this queue
+ *
+ * It will not queue the request if there is an error with the bio, or at the
+ * request creation.
+ *
+ * Returns: Request queue cookie.
+ */
static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
{
const int is_sync = op_is_sync(bio->bi_opf);
@@ -1950,7 +2002,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
plug = blk_mq_plug(q, bio);
if (unlikely(is_flush_fua)) {
- /* bypass scheduler for flush rq */
+ /* Bypass scheduler for flush requests */
blk_insert_flush(rq);
blk_mq_run_hw_queue(data.hctx, true);
} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs ||
@@ -1978,6 +2030,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
blk_add_rq_to_plug(plug, rq);
} else if (q->elevator) {
+ /* Insert the request at the IO scheduler queue */
blk_mq_sched_insert_request(rq, false, true, true);
} else if (plug && !blk_queue_nomerges(q)) {
/*
@@ -2004,8 +2057,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
}
} else if ((q->nr_hw_queues > 1 && is_sync) ||
!data.hctx->dispatch_busy) {
+ /*
+ * There is no scheduler and we can try to send directly
+ * to the hardware.
+ */
blk_mq_try_issue_directly(data.hctx, rq, &cookie);
} else {
+ /* Default case. */
blk_mq_sched_insert_request(rq, false, true, true);
}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 5f6dcc7a47bd..c8eda2e7b91e 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -328,7 +328,7 @@ EXPORT_SYMBOL(blk_queue_max_segment_size);
* storage device can address. The default of 512 covers most
* hardware.
**/
-void blk_queue_logical_block_size(struct request_queue *q, unsigned short size)
+void blk_queue_logical_block_size(struct request_queue *q, unsigned int size)
{
q->limits.logical_block_size = size;
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index d00fcfd71dfe..05741c6f618b 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -198,7 +198,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
break;
}
- bio->bi_opf = op;
+ bio->bi_opf = op | REQ_SYNC;
bio->bi_iter.bi_sector = sector;
sector += zone_sectors;
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 1d20c9cf213f..564fae77711d 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -321,6 +321,24 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
const char *dname;
int err;
+ /*
+ * Partitions are not supported on zoned block devices that are used as
+ * such.
+ */
+ switch (disk->queue->limits.zoned) {
+ case BLK_ZONED_HM:
+ pr_warn("%s: partitions not supported on host managed zoned block device\n",
+ disk->disk_name);
+ return ERR_PTR(-ENXIO);
+ case BLK_ZONED_HA:
+ pr_info("%s: disabling host aware zoned block device support due to partitions\n",
+ disk->disk_name);
+ disk->queue->limits.zoned = BLK_ZONED_NONE;
+ break;
+ case BLK_ZONED_NONE:
+ break;
+ }
+
err = disk_expand_part_tbl(disk, partno);
if (err)
return ERR_PTR(err);
@@ -501,7 +519,7 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
part = add_partition(disk, p, from, size, state->parts[p].flags,
&state->parts[p].info);
- if (IS_ERR(part)) {
+ if (IS_ERR(part) && PTR_ERR(part) != -ENXIO) {
printk(KERN_ERR " %s: p%d could not be added: %ld\n",
disk->disk_name, p, -PTR_ERR(part));
return true;
@@ -540,10 +558,10 @@ int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
}
/*
- * Partitions are not supported on zoned block devices.
+ * Partitions are not supported on host managed zoned block devices.
*/
- if (bdev_is_zoned(bdev)) {
- pr_warn("%s: ignoring partition table on zoned block device\n",
+ if (disk->queue->limits.zoned == BLK_ZONED_HM) {
+ pr_warn("%s: ignoring partition table on host managed zoned block device\n",
disk->disk_name);
ret = 0;
goto out_free_state;
diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c
index fe5d970e2e60..a2d97ee1908c 100644
--- a/block/partitions/ldm.c
+++ b/block/partitions/ldm.c
@@ -1233,7 +1233,7 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
BUG_ON (!data || !frags);
if (size < 2 * VBLK_SIZE_HEAD) {
- ldm_error("Value of size is to small.");
+ ldm_error("Value of size is too small.");
return false;
}
diff --git a/block/t10-pi.c b/block/t10-pi.c
index f4907d941f03..d910534b3a41 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -7,6 +7,7 @@
#include <linux/t10-pi.h>
#include <linux/blkdev.h>
#include <linux/crc-t10dif.h>
+#include <linux/module.h>
#include <net/checksum.h>
typedef __be16 (csum_fn) (void *, unsigned int);
@@ -280,3 +281,5 @@ const struct blk_integrity_profile t10_pi_type3_ip = {
.complete_fn = t10_pi_type3_complete,
};
EXPORT_SYMBOL(t10_pi_type3_ip);
+
+MODULE_LICENSE("GPL");