summaryrefslogtreecommitdiff
path: root/block/blk-lib.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-01 20:39:57 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-01 20:39:57 +0300
commit694752922b12bd318aa80191bd9d8c3dcfb39055 (patch)
tree5afe83fd99100bea546dd5a1c1f778c58f41e5c0 /block/blk-lib.c
parenta351e9b9fc24e982ec2f0e76379a49826036da12 (diff)
parent9438b3e080beccf6022138ea62192d55cc7dc4ed (diff)
downloadlinux-694752922b12bd318aa80191bd9d8c3dcfb39055.tar.xz
Merge branch 'for-4.12/block' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: - Add BFQ IO scheduler under the new blk-mq scheduling framework. BFQ was initially a fork of CFQ, but subsequently changed to implement fairness based on B-WF2Q+, a modified variant of WF2Q. BFQ is meant to be used on desktop type single drives, providing good fairness. From Paolo. - Add Kyber IO scheduler. This is a full multiqueue aware scheduler, using a scalable token based algorithm that throttles IO based on live completion IO stats, similary to blk-wbt. From Omar. - A series from Jan, moving users to separately allocated backing devices. This continues the work of separating backing device life times, solving various problems with hot removal. - A series of updates for lightnvm, mostly from Javier. Includes a 'pblk' target that exposes an open channel SSD as a physical block device. - A series of fixes and improvements for nbd from Josef. - A series from Omar, removing queue sharing between devices on mostly legacy drivers. This helps us clean up other bits, if we know that a queue only has a single device backing. This has been overdue for more than a decade. - Fixes for the blk-stats, and improvements to unify the stats and user windows. This both improves blk-wbt, and enables other users to register a need to receive IO stats for a device. From Omar. - blk-throttle improvements from Shaohua. This provides a scalable framework for implementing scalable priotization - particularly for blk-mq, but applicable to any type of block device. The interface is marked experimental for now. - Bucketized IO stats for IO polling from Stephen Bates. This improves efficiency of polled workloads in the presence of mixed block size IO. - A few fixes for opal, from Scott. - A few pulls for NVMe, including a lot of fixes for NVMe-over-fabrics. From a variety of folks, mostly Sagi and James Smart. - A series from Bart, improving our exposed info and capabilities from the blk-mq debugfs support. - A series from Christoph, cleaning up how handle WRITE_ZEROES. - A series from Christoph, cleaning up the block layer handling of how we track errors in a request. On top of being a nice cleanup, it also shrinks the size of struct request a bit. - Removal of mg_disk and hd (sorry Linus) by Christoph. The former was never used by platforms, and the latter has outlived it's usefulness. - Various little bug fixes and cleanups from a wide variety of folks. * 'for-4.12/block' of git://git.kernel.dk/linux-block: (329 commits) block: hide badblocks attribute by default blk-mq: unify hctx delay_work and run_work block: add kblock_mod_delayed_work_on() blk-mq: unify hctx delayed_run_work and run_work nbd: fix use after free on module unload MAINTAINERS: bfq: Add Paolo as maintainer for the BFQ I/O scheduler blk-mq-sched: alloate reserved tags out of normal pool mtip32xx: use runtime tag to initialize command header scsi: Implement blk_mq_ops.show_rq() blk-mq: Add blk_mq_ops.show_rq() blk-mq: Show operation, cmd_flags and rq_flags names blk-mq: Make blk_flags_show() callers append a newline character blk-mq: Move the "state" debugfs attribute one level down blk-mq: Unregister debugfs attributes earlier blk-mq: Only unregister hctxs for which registration succeeded blk-mq-debugfs: Rename functions for registering and unregistering the mq directory blk-mq: Let blk_mq_debugfs_register() look up the queue name blk-mq: Register <dev>/queue/mq after having registered <dev>/queue ide-pm: always pass 0 error to ide_complete_rq in ide_do_devset ide-pm: always pass 0 error to __blk_end_request_all ..
Diffstat (limited to 'block/blk-lib.c')
-rw-r--r--block/blk-lib.c78
1 files changed, 30 insertions, 48 deletions
diff --git a/block/blk-lib.c b/block/blk-lib.c
index ed1e78e24db0..e8caecd71688 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -37,17 +37,12 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
return -ENXIO;
if (flags & BLKDEV_DISCARD_SECURE) {
- if (flags & BLKDEV_DISCARD_ZERO)
- return -EOPNOTSUPP;
if (!blk_queue_secure_erase(q))
return -EOPNOTSUPP;
op = REQ_OP_SECURE_ERASE;
} else {
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
- if ((flags & BLKDEV_DISCARD_ZERO) &&
- !q->limits.discard_zeroes_data)
- return -EOPNOTSUPP;
op = REQ_OP_DISCARD;
}
@@ -109,7 +104,7 @@ EXPORT_SYMBOL(__blkdev_issue_discard);
* @sector: start sector
* @nr_sects: number of sectors to discard
* @gfp_mask: memory allocation flags (for bio_alloc)
- * @flags: BLKDEV_IFL_* flags to control behaviour
+ * @flags: BLKDEV_DISCARD_* flags to control behaviour
*
* Description:
* Issue a discard request for the sectors in question.
@@ -126,7 +121,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
&bio);
if (!ret && bio) {
ret = submit_bio_wait(bio);
- if (ret == -EOPNOTSUPP && !(flags & BLKDEV_DISCARD_ZERO))
+ if (ret == -EOPNOTSUPP)
ret = 0;
bio_put(bio);
}
@@ -226,20 +221,9 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL(blkdev_issue_write_same);
-/**
- * __blkdev_issue_write_zeroes - generate number of bios with WRITE ZEROES
- * @bdev: blockdev to issue
- * @sector: start sector
- * @nr_sects: number of sectors to write
- * @gfp_mask: memory allocation flags (for bio_alloc)
- * @biop: pointer to anchor bio
- *
- * Description:
- * Generate and issue number of bios(REQ_OP_WRITE_ZEROES) with zerofiled pages.
- */
static int __blkdev_issue_write_zeroes(struct block_device *bdev,
sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
- struct bio **biop)
+ struct bio **biop, unsigned flags)
{
struct bio *bio = *biop;
unsigned int max_write_zeroes_sectors;
@@ -258,7 +242,9 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
bio = next_bio(bio, 0, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = bdev;
- bio_set_op_attrs(bio, REQ_OP_WRITE_ZEROES, 0);
+ bio->bi_opf = REQ_OP_WRITE_ZEROES;
+ if (flags & BLKDEV_ZERO_NOUNMAP)
+ bio->bi_opf |= REQ_NOUNMAP;
if (nr_sects > max_write_zeroes_sectors) {
bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
@@ -282,14 +268,27 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
* @biop: pointer to anchor bio
- * @discard: discard flag
+ * @flags: controls detailed behavior
*
* Description:
- * Generate and issue number of bios with zerofiled pages.
+ * Zero-fill a block range, either using hardware offload or by explicitly
+ * writing zeroes to the device.
+ *
+ * Note that this function may fail with -EOPNOTSUPP if the driver signals
+ * zeroing offload support, but the device fails to process the command (for
+ * some devices there is no non-destructive way to verify whether this
+ * operation is actually supported). In this case the caller should call
+ * retry the call to blkdev_issue_zeroout() and the fallback path will be used.
+ *
+ * If a device is using logical block provisioning, the underlying space will
+ * not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
+ *
+ * If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return
+ * -EOPNOTSUPP if no explicit hardware offload for zeroing is provided.
*/
int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
- bool discard)
+ unsigned flags)
{
int ret;
int bi_size = 0;
@@ -302,8 +301,8 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
return -EINVAL;
ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
- biop);
- if (ret == 0 || (ret && ret != -EOPNOTSUPP))
+ biop, flags);
+ if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
goto out;
ret = 0;
@@ -337,40 +336,23 @@ EXPORT_SYMBOL(__blkdev_issue_zeroout);
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
- * @discard: whether to discard the block range
+ * @flags: controls detailed behavior
*
* Description:
- * Zero-fill a block range. If the discard flag is set and the block
- * device guarantees that subsequent READ operations to the block range
- * in question will return zeroes, the blocks will be discarded. Should
- * the discard request fail, if the discard flag is not set, or if
- * discard_zeroes_data is not supported, this function will resort to
- * zeroing the blocks manually, thus provisioning (allocating,
- * anchoring) them. If the block device supports WRITE ZEROES or WRITE SAME
- * command(s), blkdev_issue_zeroout() will use it to optimize the process of
- * clearing the block range. Otherwise the zeroing will be performed
- * using regular WRITE calls.
+ * Zero-fill a block range, either using hardware offload or by explicitly
+ * writing zeroes to the device. See __blkdev_issue_zeroout() for the
+ * valid values for %flags.
*/
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, bool discard)
+ sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
{
int ret;
struct bio *bio = NULL;
struct blk_plug plug;
- if (discard) {
- if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
- BLKDEV_DISCARD_ZERO))
- return 0;
- }
-
- if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
- ZERO_PAGE(0)))
- return 0;
-
blk_start_plug(&plug);
ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
- &bio, discard);
+ &bio, flags);
if (ret == 0 && bio) {
ret = submit_bio_wait(bio);
bio_put(bio);