summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/bio.c426
-rw-r--r--block/blk-core.c34
-rw-r--r--block/blk-lib.c30
-rw-r--r--block/blk-map.c172
-rw-r--r--block/blk-merge.c41
-rw-r--r--block/blk-mq-tag.c95
-rw-r--r--block/blk-mq-tag.h5
-rw-r--r--block/blk-mq.c117
-rw-r--r--block/blk-mq.h3
-rw-r--r--block/blk-sysfs.c2
-rw-r--r--block/blk-tag.c33
-rw-r--r--block/blk-timeout.c3
-rw-r--r--block/bsg.c72
-rw-r--r--block/cfq-iosched.c16
-rw-r--r--block/ioctl.c2
-rw-r--r--block/partitions/check.c12
-rw-r--r--block/partitions/efi.c2
-rw-r--r--block/scsi_ioctl.c17
18 files changed, 529 insertions, 553 deletions
diff --git a/block/bio.c b/block/bio.c
index 471d7382c7d1..f66a4eae16ee 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -28,7 +28,6 @@
#include <linux/mempool.h>
#include <linux/workqueue.h>
#include <linux/cgroup.h>
-#include <scsi/sg.h> /* for struct sg_iovec */
#include <trace/events/block.h>
@@ -1022,21 +1021,11 @@ void bio_copy_data(struct bio *dst, struct bio *src)
EXPORT_SYMBOL(bio_copy_data);
struct bio_map_data {
- int nr_sgvecs;
int is_our_pages;
- struct sg_iovec sgvecs[];
+ struct iov_iter iter;
+ struct iovec iov[];
};
-static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
- const struct sg_iovec *iov, int iov_count,
- int is_our_pages)
-{
- memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
- bmd->nr_sgvecs = iov_count;
- bmd->is_our_pages = is_our_pages;
- bio->bi_private = bmd;
-}
-
static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
gfp_t gfp_mask)
{
@@ -1044,85 +1033,101 @@ static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
return NULL;
return kmalloc(sizeof(struct bio_map_data) +
- sizeof(struct sg_iovec) * iov_count, gfp_mask);
+ sizeof(struct iovec) * iov_count, gfp_mask);
}
-static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count,
- int to_user, int from_user, int do_free_page)
+/**
+ * bio_copy_from_iter - copy all pages from iov_iter to bio
+ * @bio: The &struct bio which describes the I/O as destination
+ * @iter: iov_iter as source
+ *
+ * Copy all pages from iov_iter to bio.
+ * Returns 0 on success, or error on failure.
+ */
+static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter)
{
- int ret = 0, i;
+ int i;
struct bio_vec *bvec;
- int iov_idx = 0;
- unsigned int iov_off = 0;
bio_for_each_segment_all(bvec, bio, i) {
- char *bv_addr = page_address(bvec->bv_page);
- unsigned int bv_len = bvec->bv_len;
+ ssize_t ret;
- while (bv_len && iov_idx < iov_count) {
- unsigned int bytes;
- char __user *iov_addr;
+ ret = copy_page_from_iter(bvec->bv_page,
+ bvec->bv_offset,
+ bvec->bv_len,
+ &iter);
- bytes = min_t(unsigned int,
- iov[iov_idx].iov_len - iov_off, bv_len);
- iov_addr = iov[iov_idx].iov_base + iov_off;
+ if (!iov_iter_count(&iter))
+ break;
- if (!ret) {
- if (to_user)
- ret = copy_to_user(iov_addr, bv_addr,
- bytes);
+ if (ret < bvec->bv_len)
+ return -EFAULT;
+ }
- if (from_user)
- ret = copy_from_user(bv_addr, iov_addr,
- bytes);
+ return 0;
+}
- if (ret)
- ret = -EFAULT;
- }
+/**
+ * bio_copy_to_iter - copy all pages from bio to iov_iter
+ * @bio: The &struct bio which describes the I/O as source
+ * @iter: iov_iter as destination
+ *
+ * Copy all pages from bio to iov_iter.
+ * Returns 0 on success, or error on failure.
+ */
+static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
+{
+ int i;
+ struct bio_vec *bvec;
- bv_len -= bytes;
- bv_addr += bytes;
- iov_addr += bytes;
- iov_off += bytes;
+ bio_for_each_segment_all(bvec, bio, i) {
+ ssize_t ret;
- if (iov[iov_idx].iov_len == iov_off) {
- iov_idx++;
- iov_off = 0;
- }
- }
+ ret = copy_page_to_iter(bvec->bv_page,
+ bvec->bv_offset,
+ bvec->bv_len,
+ &iter);
+
+ if (!iov_iter_count(&iter))
+ break;
- if (do_free_page)
- __free_page(bvec->bv_page);
+ if (ret < bvec->bv_len)
+ return -EFAULT;
}
- return ret;
+ return 0;
+}
+
+static void bio_free_pages(struct bio *bio)
+{
+ struct bio_vec *bvec;
+ int i;
+
+ bio_for_each_segment_all(bvec, bio, i)
+ __free_page(bvec->bv_page);
}
/**
* bio_uncopy_user - finish previously mapped bio
* @bio: bio being terminated
*
- * Free pages allocated from bio_copy_user() and write back data
+ * Free pages allocated from bio_copy_user_iov() and write back data
* to user space in case of a read.
*/
int bio_uncopy_user(struct bio *bio)
{
struct bio_map_data *bmd = bio->bi_private;
- struct bio_vec *bvec;
- int ret = 0, i;
+ int ret = 0;
if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
/*
* if we're in a workqueue, the request is orphaned, so
* don't copy into a random user address space, just free.
*/
- if (current->mm)
- ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs,
- bio_data_dir(bio) == READ,
- 0, bmd->is_our_pages);
- else if (bmd->is_our_pages)
- bio_for_each_segment_all(bvec, bio, i)
- __free_page(bvec->bv_page);
+ if (current->mm && bio_data_dir(bio) == READ)
+ ret = bio_copy_to_iter(bio, bmd->iter);
+ if (bmd->is_our_pages)
+ bio_free_pages(bio);
}
kfree(bmd);
bio_put(bio);
@@ -1132,12 +1137,10 @@ EXPORT_SYMBOL(bio_uncopy_user);
/**
* bio_copy_user_iov - copy user data to bio
- * @q: destination block queue
- * @map_data: pointer to the rq_map_data holding pages (if necessary)
- * @iov: the iovec.
- * @iov_count: number of elements in the iovec
- * @write_to_vm: bool indicating writing to pages or not
- * @gfp_mask: memory allocation flags
+ * @q: destination block queue
+ * @map_data: pointer to the rq_map_data holding pages (if necessary)
+ * @iter: iovec iterator
+ * @gfp_mask: memory allocation flags
*
* Prepares and returns a bio for indirect user io, bouncing data
* to/from kernel pages as necessary. Must be paired with
@@ -1145,25 +1148,25 @@ EXPORT_SYMBOL(bio_uncopy_user);
*/
struct bio *bio_copy_user_iov(struct request_queue *q,
struct rq_map_data *map_data,
- const struct sg_iovec *iov, int iov_count,
- int write_to_vm, gfp_t gfp_mask)
+ const struct iov_iter *iter,
+ gfp_t gfp_mask)
{
struct bio_map_data *bmd;
- struct bio_vec *bvec;
struct page *page;
struct bio *bio;
int i, ret;
int nr_pages = 0;
- unsigned int len = 0;
+ unsigned int len = iter->count;
unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
- for (i = 0; i < iov_count; i++) {
+ for (i = 0; i < iter->nr_segs; i++) {
unsigned long uaddr;
unsigned long end;
unsigned long start;
- uaddr = (unsigned long)iov[i].iov_base;
- end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ uaddr = (unsigned long) iter->iov[i].iov_base;
+ end = (uaddr + iter->iov[i].iov_len + PAGE_SIZE - 1)
+ >> PAGE_SHIFT;
start = uaddr >> PAGE_SHIFT;
/*
@@ -1173,22 +1176,31 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
return ERR_PTR(-EINVAL);
nr_pages += end - start;
- len += iov[i].iov_len;
}
if (offset)
nr_pages++;
- bmd = bio_alloc_map_data(iov_count, gfp_mask);
+ bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask);
if (!bmd)
return ERR_PTR(-ENOMEM);
+ /*
+ * We need to do a deep copy of the iov_iter including the iovecs.
+ * The caller provided iov might point to an on-stack or otherwise
+ * shortlived one.
+ */
+ bmd->is_our_pages = map_data ? 0 : 1;
+ memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs);
+ iov_iter_init(&bmd->iter, iter->type, bmd->iov,
+ iter->nr_segs, iter->count);
+
ret = -ENOMEM;
bio = bio_kmalloc(gfp_mask, nr_pages);
if (!bio)
goto out_bmd;
- if (!write_to_vm)
+ if (iter->type & WRITE)
bio->bi_rw |= REQ_WRITE;
ret = 0;
@@ -1236,20 +1248,18 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
/*
* success
*/
- if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
+ if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) ||
(map_data && map_data->from_user)) {
- ret = __bio_copy_iov(bio, iov, iov_count, 0, 1, 0);
+ ret = bio_copy_from_iter(bio, *iter);
if (ret)
goto cleanup;
}
- bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
+ bio->bi_private = bmd;
return bio;
cleanup:
if (!map_data)
- bio_for_each_segment_all(bvec, bio, i)
- __free_page(bvec->bv_page);
-
+ bio_free_pages(bio);
bio_put(bio);
out_bmd:
kfree(bmd);
@@ -1257,46 +1267,30 @@ out_bmd:
}
/**
- * bio_copy_user - copy user data to bio
- * @q: destination block queue
- * @map_data: pointer to the rq_map_data holding pages (if necessary)
- * @uaddr: start of user address
- * @len: length in bytes
- * @write_to_vm: bool indicating writing to pages or not
- * @gfp_mask: memory allocation flags
+ * bio_map_user_iov - map user iovec into bio
+ * @q: the struct request_queue for the bio
+ * @iter: iovec iterator
+ * @gfp_mask: memory allocation flags
*
- * Prepares and returns a bio for indirect user io, bouncing data
- * to/from kernel pages as necessary. Must be paired with
- * call bio_uncopy_user() on io completion.
+ * Map the user space address into a bio suitable for io to a block
+ * device. Returns an error pointer in case of error.
*/
-struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
- unsigned long uaddr, unsigned int len,
- int write_to_vm, gfp_t gfp_mask)
+struct bio *bio_map_user_iov(struct request_queue *q,
+ const struct iov_iter *iter,
+ gfp_t gfp_mask)
{
- struct sg_iovec iov;
-
- iov.iov_base = (void __user *)uaddr;
- iov.iov_len = len;
-
- return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
-}
-EXPORT_SYMBOL(bio_copy_user);
-
-static struct bio *__bio_map_user_iov(struct request_queue *q,
- struct block_device *bdev,
- const struct sg_iovec *iov, int iov_count,
- int write_to_vm, gfp_t gfp_mask)
-{
- int i, j;
+ int j;
int nr_pages = 0;
struct page **pages;
struct bio *bio;
int cur_page = 0;
int ret, offset;
+ struct iov_iter i;
+ struct iovec iov;
- for (i = 0; i < iov_count; i++) {
- unsigned long uaddr = (unsigned long)iov[i].iov_base;
- unsigned long len = iov[i].iov_len;
+ iov_for_each(iov, i, *iter) {
+ unsigned long uaddr = (unsigned long) iov.iov_base;
+ unsigned long len = iov.iov_len;
unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long start = uaddr >> PAGE_SHIFT;
@@ -1326,16 +1320,17 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
if (!pages)
goto out;
- for (i = 0; i < iov_count; i++) {
- unsigned long uaddr = (unsigned long)iov[i].iov_base;
- unsigned long len = iov[i].iov_len;
+ iov_for_each(iov, i, *iter) {
+ unsigned long uaddr = (unsigned long) iov.iov_base;
+ unsigned long len = iov.iov_len;
unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long start = uaddr >> PAGE_SHIFT;
const int local_nr_pages = end - start;
const int page_limit = cur_page + local_nr_pages;
ret = get_user_pages_fast(uaddr, local_nr_pages,
- write_to_vm, &pages[cur_page]);
+ (iter->type & WRITE) != WRITE,
+ &pages[cur_page]);
if (ret < local_nr_pages) {
ret = -EFAULT;
goto out_unmap;
@@ -1375,72 +1370,10 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
/*
* set data direction, and check if mapped pages need bouncing
*/
- if (!write_to_vm)
+ if (iter->type & WRITE)
bio->bi_rw |= REQ_WRITE;
- bio->bi_bdev = bdev;
bio->bi_flags |= (1 << BIO_USER_MAPPED);
- return bio;
-
- out_unmap:
- for (i = 0; i < nr_pages; i++) {
- if(!pages[i])
- break;
- page_cache_release(pages[i]);
- }
- out:
- kfree(pages);
- bio_put(bio);
- return ERR_PTR(ret);
-}
-
-/**
- * bio_map_user - map user address into bio
- * @q: the struct request_queue for the bio
- * @bdev: destination block device
- * @uaddr: start of user address
- * @len: length in bytes
- * @write_to_vm: bool indicating writing to pages or not
- * @gfp_mask: memory allocation flags
- *
- * Map the user space address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
- unsigned long uaddr, unsigned int len, int write_to_vm,
- gfp_t gfp_mask)
-{
- struct sg_iovec iov;
-
- iov.iov_base = (void __user *)uaddr;
- iov.iov_len = len;
-
- return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
-}
-EXPORT_SYMBOL(bio_map_user);
-
-/**
- * bio_map_user_iov - map user sg_iovec table into bio
- * @q: the struct request_queue for the bio
- * @bdev: destination block device
- * @iov: the iovec.
- * @iov_count: number of elements in the iovec
- * @write_to_vm: bool indicating writing to pages or not
- * @gfp_mask: memory allocation flags
- *
- * Map the user space address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
- const struct sg_iovec *iov, int iov_count,
- int write_to_vm, gfp_t gfp_mask)
-{
- struct bio *bio;
-
- bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
- gfp_mask);
- if (IS_ERR(bio))
- return bio;
/*
* subtle -- if __bio_map_user() ended up bouncing a bio,
@@ -1449,8 +1382,18 @@ struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
* reference to it
*/
bio_get(bio);
-
return bio;
+
+ out_unmap:
+ for (j = 0; j < nr_pages; j++) {
+ if (!pages[j])
+ break;
+ page_cache_release(pages[j]);
+ }
+ out:
+ kfree(pages);
+ bio_put(bio);
+ return ERR_PTR(ret);
}
static void __bio_unmap_user(struct bio *bio)
@@ -1492,8 +1435,18 @@ static void bio_map_kern_endio(struct bio *bio, int err)
bio_put(bio);
}
-static struct bio *__bio_map_kern(struct request_queue *q, void *data,
- unsigned int len, gfp_t gfp_mask)
+/**
+ * bio_map_kern - map kernel address into bio
+ * @q: the struct request_queue for the bio
+ * @data: pointer to buffer to map
+ * @len: length in bytes
+ * @gfp_mask: allocation flags for bio allocation
+ *
+ * Map the kernel address into a bio suitable for io to a block
+ * device. Returns an error pointer in case of error.
+ */
+struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
+ gfp_t gfp_mask)
{
unsigned long kaddr = (unsigned long)data;
unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -1517,8 +1470,11 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data,
bytes = len;
if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
- offset) < bytes)
- break;
+ offset) < bytes) {
+ /* we don't support partial mappings */
+ bio_put(bio);
+ return ERR_PTR(-EINVAL);
+ }
data += bytes;
len -= bytes;
@@ -1528,57 +1484,26 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data,
bio->bi_end_io = bio_map_kern_endio;
return bio;
}
+EXPORT_SYMBOL(bio_map_kern);
-/**
- * bio_map_kern - map kernel address into bio
- * @q: the struct request_queue for the bio
- * @data: pointer to buffer to map
- * @len: length in bytes
- * @gfp_mask: allocation flags for bio allocation
- *
- * Map the kernel address into a bio suitable for io to a block
- * device. Returns an error pointer in case of error.
- */
-struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
- gfp_t gfp_mask)
+static void bio_copy_kern_endio(struct bio *bio, int err)
{
- struct bio *bio;
-
- bio = __bio_map_kern(q, data, len, gfp_mask);
- if (IS_ERR(bio))
- return bio;
-
- if (bio->bi_iter.bi_size == len)
- return bio;
-
- /*
- * Don't support partial mappings.
- */
+ bio_free_pages(bio);
bio_put(bio);
- return ERR_PTR(-EINVAL);
}
-EXPORT_SYMBOL(bio_map_kern);
-static void bio_copy_kern_endio(struct bio *bio, int err)
+static void bio_copy_kern_endio_read(struct bio *bio, int err)
{
+ char *p = bio->bi_private;
struct bio_vec *bvec;
- const int read = bio_data_dir(bio) == READ;
- struct bio_map_data *bmd = bio->bi_private;
int i;
- char *p = bmd->sgvecs[0].iov_base;
bio_for_each_segment_all(bvec, bio, i) {
- char *addr = page_address(bvec->bv_page);
-
- if (read)
- memcpy(p, addr, bvec->bv_len);
-
- __free_page(bvec->bv_page);
+ memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
p += bvec->bv_len;
}
- kfree(bmd);
- bio_put(bio);
+ bio_copy_kern_endio(bio, err);
}
/**
@@ -1595,28 +1520,59 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
gfp_t gfp_mask, int reading)
{
+ unsigned long kaddr = (unsigned long)data;
+ unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ unsigned long start = kaddr >> PAGE_SHIFT;
struct bio *bio;
- struct bio_vec *bvec;
- int i;
+ void *p = data;
+ int nr_pages = 0;
+
+ /*
+ * Overflow, abort
+ */
+ if (end < start)
+ return ERR_PTR(-EINVAL);
- bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
- if (IS_ERR(bio))
- return bio;
+ nr_pages = end - start;
+ bio = bio_kmalloc(gfp_mask, nr_pages);
+ if (!bio)
+ return ERR_PTR(-ENOMEM);
- if (!reading) {
- void *p = data;
+ while (len) {
+ struct page *page;
+ unsigned int bytes = PAGE_SIZE;
- bio_for_each_segment_all(bvec, bio, i) {
- char *addr = page_address(bvec->bv_page);
+ if (bytes > len)
+ bytes = len;
- memcpy(addr, p, bvec->bv_len);
- p += bvec->bv_len;
- }
+ page = alloc_page(q->bounce_gfp | gfp_mask);
+ if (!page)
+ goto cleanup;
+
+ if (!reading)
+ memcpy(page_address(page), p, bytes);
+
+ if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
+ break;
+
+ len -= bytes;
+ p += bytes;
}
- bio->bi_end_io = bio_copy_kern_endio;
+ if (reading) {
+ bio->bi_end_io = bio_copy_kern_endio_read;
+ bio->bi_private = data;
+ } else {
+ bio->bi_end_io = bio_copy_kern_endio;
+ bio->bi_rw |= REQ_WRITE;
+ }
return bio;
+
+cleanup:
+ bio_free_pages(bio);
+ bio_put(bio);
+ return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(bio_copy_kern);
diff --git a/block/blk-core.c b/block/blk-core.c
index 30f6153a40c2..794c3e7f01cf 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -473,6 +473,25 @@ void blk_queue_bypass_end(struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
+void blk_set_queue_dying(struct request_queue *q)
+{
+ queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
+
+ if (q->mq_ops)
+ blk_mq_wake_waiters(q);
+ else {
+ struct request_list *rl;
+
+ blk_queue_for_each_rl(rl, q) {
+ if (rl->rq_pool) {
+ wake_up(&rl->wait[BLK_RW_SYNC]);
+ wake_up(&rl->wait[BLK_RW_ASYNC]);
+ }
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(blk_set_queue_dying);
+
/**
* blk_cleanup_queue - shutdown a request queue
* @q: request queue to shutdown
@@ -486,7 +505,7 @@ void blk_cleanup_queue(struct request_queue *q)
/* mark @q DYING, no new request or merges will be allowed afterwards */
mutex_lock(&q->sysfs_lock);
- queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
+ blk_set_queue_dying(q);
spin_lock_irq(lock);
/*
@@ -588,7 +607,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
q->backing_dev_info.ra_pages =
(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
q->backing_dev_info.state = 0;
- q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
+ q->backing_dev_info.capabilities = 0;
q->backing_dev_info.name = "block";
q->node = node_id;
@@ -2029,6 +2048,13 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
return -EIO;
+ if (q->mq_ops) {
+ if (blk_queue_io_stat(q))
+ blk_account_io_start(rq, true);
+ blk_mq_insert_request(rq, false, true, true);
+ return 0;
+ }
+
spin_lock_irqsave(q->queue_lock, flags);
if (unlikely(blk_queue_dying(q))) {
spin_unlock_irqrestore(q->queue_lock, flags);
@@ -2888,7 +2914,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
static void __blk_rq_prep_clone(struct request *dst, struct request *src)
{
dst->cpu = src->cpu;
- dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
+ dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
dst->cmd_type = src->cmd_type;
dst->__sector = blk_rq_pos(src);
dst->__data_len = blk_rq_bytes(src);
@@ -2926,8 +2952,6 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
if (!bs)
bs = fs_bio_set;
- blk_rq_init(NULL, rq);
-
__rq_for_each_bio(bio_src, rq_src) {
bio = bio_clone_fast(bio_src, gfp_mask, bs);
if (!bio)
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 8411be3c19d3..7688ee3f5d72 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -283,24 +283,34 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
+ * @discard: whether to discard the block range
*
* Description:
- * Generate and issue number of bios with zerofiled pages.
+ * Zero-fill a block range. If the discard flag is set and the block
+ * device guarantees that subsequent READ operations to the block range
+ * in question will return zeroes, the blocks will be discarded. Should
+ * the discard request fail, if the discard flag is not set, or if
+ * discard_zeroes_data is not supported, this function will resort to
+ * zeroing the blocks manually, thus provisioning (allocating,
+ * anchoring) them. If the block device supports the WRITE SAME command
+ * blkdev_issue_zeroout() will use it to optimize the process of
+ * clearing the block range. Otherwise the zeroing will be performed
+ * using regular WRITE calls.
*/
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask)
+ sector_t nr_sects, gfp_t gfp_mask, bool discard)
{
- if (bdev_write_same(bdev)) {
- unsigned char bdn[BDEVNAME_SIZE];
+ struct request_queue *q = bdev_get_queue(bdev);
- if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
- ZERO_PAGE(0)))
- return 0;
+ if (discard && blk_queue_discard(q) && q->limits.discard_zeroes_data &&
+ blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0) == 0)
+ return 0;
- bdevname(bdev, bdn);
- pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn);
- }
+ if (bdev_write_same(bdev) &&
+ blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
+ ZERO_PAGE(0)) == 0)
+ return 0;
return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
}
diff --git a/block/blk-map.c b/block/blk-map.c
index f890d4345b0c..b8d2725324a6 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -5,7 +5,7 @@
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
-#include <scsi/sg.h> /* for struct sg_iovec */
+#include <linux/uio.h>
#include "blk.h"
@@ -39,138 +39,12 @@ static int __blk_rq_unmap_user(struct bio *bio)
return ret;
}
-static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
- struct rq_map_data *map_data, void __user *ubuf,
- unsigned int len, gfp_t gfp_mask)
-{
- unsigned long uaddr;
- struct bio *bio, *orig_bio;
- int reading, ret;
-
- reading = rq_data_dir(rq) == READ;
-
- /*
- * if alignment requirement is satisfied, map in user pages for
- * direct dma. else, set up kernel bounce buffers
- */
- uaddr = (unsigned long) ubuf;
- if (blk_rq_aligned(q, uaddr, len) && !map_data)
- bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask);
- else
- bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask);
-
- if (IS_ERR(bio))
- return PTR_ERR(bio);
-
- if (map_data && map_data->null_mapped)
- bio->bi_flags |= (1 << BIO_NULL_MAPPED);
-
- orig_bio = bio;
- blk_queue_bounce(q, &bio);
-
- /*
- * We link the bounce buffer in and could have to traverse it
- * later so we have to get a ref to prevent it from being freed
- */
- bio_get(bio);
-
- ret = blk_rq_append_bio(q, rq, bio);
- if (!ret)
- return bio->bi_iter.bi_size;
-
- /* if it was boucned we must call the end io function */
- bio_endio(bio, 0);
- __blk_rq_unmap_user(orig_bio);
- bio_put(bio);
- return ret;
-}
-
-/**
- * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage
- * @q: request queue where request should be inserted
- * @rq: request structure to fill
- * @map_data: pointer to the rq_map_data holding pages (if necessary)
- * @ubuf: the user buffer
- * @len: length of user data
- * @gfp_mask: memory allocation flags
- *
- * Description:
- * Data will be mapped directly for zero copy I/O, if possible. Otherwise
- * a kernel bounce buffer is used.
- *
- * A matching blk_rq_unmap_user() must be issued at the end of I/O, while
- * still in process context.
- *
- * Note: The mapped bio may need to be bounced through blk_queue_bounce()
- * before being submitted to the device, as pages mapped may be out of
- * reach. It's the callers responsibility to make sure this happens. The
- * original bio must be passed back in to blk_rq_unmap_user() for proper
- * unmapping.
- */
-int blk_rq_map_user(struct request_queue *q, struct request *rq,
- struct rq_map_data *map_data, void __user *ubuf,
- unsigned long len, gfp_t gfp_mask)
-{
- unsigned long bytes_read = 0;
- struct bio *bio = NULL;
- int ret;
-
- if (len > (queue_max_hw_sectors(q) << 9))
- return -EINVAL;
- if (!len)
- return -EINVAL;
-
- if (!ubuf && (!map_data || !map_data->null_mapped))
- return -EINVAL;
-
- while (bytes_read != len) {
- unsigned long map_len, end, start;
-
- map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE);
- end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1)
- >> PAGE_SHIFT;
- start = (unsigned long)ubuf >> PAGE_SHIFT;
-
- /*
- * A bad offset could cause us to require BIO_MAX_PAGES + 1
- * pages. If this happens we just lower the requested
- * mapping len by a page so that we can fit
- */
- if (end - start > BIO_MAX_PAGES)
- map_len -= PAGE_SIZE;
-
- ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len,
- gfp_mask);
- if (ret < 0)
- goto unmap_rq;
- if (!bio)
- bio = rq->bio;
- bytes_read += ret;
- ubuf += ret;
-
- if (map_data)
- map_data->offset += ret;
- }
-
- if (!bio_flagged(bio, BIO_USER_MAPPED))
- rq->cmd_flags |= REQ_COPY_USER;
-
- return 0;
-unmap_rq:
- blk_rq_unmap_user(bio);
- rq->bio = NULL;
- return ret;
-}
-EXPORT_SYMBOL(blk_rq_map_user);
-
/**
* blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
* @q: request queue where request should be inserted
* @rq: request to map data to
* @map_data: pointer to the rq_map_data holding pages (if necessary)
- * @iov: pointer to the iovec
- * @iov_count: number of elements in the iovec
- * @len: I/O byte count
+ * @iter: iovec iterator
* @gfp_mask: memory allocation flags
*
* Description:
@@ -187,20 +61,21 @@ EXPORT_SYMBOL(blk_rq_map_user);
* unmapping.
*/
int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
- struct rq_map_data *map_data, const struct sg_iovec *iov,
- int iov_count, unsigned int len, gfp_t gfp_mask)
+ struct rq_map_data *map_data,
+ const struct iov_iter *iter, gfp_t gfp_mask)
{
struct bio *bio;
- int i, read = rq_data_dir(rq) == READ;
int unaligned = 0;
+ struct iov_iter i;
+ struct iovec iov;
- if (!iov || iov_count <= 0)
+ if (!iter || !iter->count)
return -EINVAL;
- for (i = 0; i < iov_count; i++) {
- unsigned long uaddr = (unsigned long)iov[i].iov_base;
+ iov_for_each(iov, i, *iter) {
+ unsigned long uaddr = (unsigned long) iov.iov_base;
- if (!iov[i].iov_len)
+ if (!iov.iov_len)
return -EINVAL;
/*
@@ -210,16 +85,18 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
unaligned = 1;
}
- if (unaligned || (q->dma_pad_mask & len) || map_data)
- bio = bio_copy_user_iov(q, map_data, iov, iov_count, read,
- gfp_mask);
+ if (unaligned || (q->dma_pad_mask & iter->count) || map_data)
+ bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
else
- bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask);
+ bio = bio_map_user_iov(q, iter, gfp_mask);
if (IS_ERR(bio))
return PTR_ERR(bio);
- if (bio->bi_iter.bi_size != len) {
+ if (map_data && map_data->null_mapped)
+ bio->bi_flags |= (1 << BIO_NULL_MAPPED);
+
+ if (bio->bi_iter.bi_size != iter->count) {
/*
* Grab an extra reference to this bio, as bio_unmap_user()
* expects to be able to drop it twice as it happens on the
@@ -241,6 +118,21 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
}
EXPORT_SYMBOL(blk_rq_map_user_iov);
+int blk_rq_map_user(struct request_queue *q, struct request *rq,
+ struct rq_map_data *map_data, void __user *ubuf,
+ unsigned long len, gfp_t gfp_mask)
+{
+ struct iovec iov;
+ struct iov_iter i;
+
+ iov.iov_base = ubuf;
+ iov.iov_len = len;
+ iov_iter_init(&i, rq_data_dir(rq), &iov, 1, len);
+
+ return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask);
+}
+EXPORT_SYMBOL(blk_rq_map_user);
+
/**
* blk_rq_unmap_user - unmap a request with user data
* @bio: start of bio list
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 89b97b5e0881..fc1ff3b1ea1f 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -283,35 +283,6 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
}
EXPORT_SYMBOL(blk_rq_map_sg);
-/**
- * blk_bio_map_sg - map a bio to a scatterlist
- * @q: request_queue in question
- * @bio: bio being mapped
- * @sglist: scatterlist being mapped
- *
- * Note:
- * Caller must make sure sg can hold bio->bi_phys_segments entries
- *
- * Will return the number of sg entries setup
- */
-int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
- struct scatterlist *sglist)
-{
- struct scatterlist *sg = NULL;
- int nsegs;
- struct bio *next = bio->bi_next;
- bio->bi_next = NULL;
-
- nsegs = __blk_bios_map_sg(q, bio, sglist, &sg);
- bio->bi_next = next;
- if (sg)
- sg_mark_end(sg);
-
- BUG_ON(bio->bi_phys_segments && nsegs > bio->bi_phys_segments);
- return nsegs;
-}
-EXPORT_SYMBOL(blk_bio_map_sg);
-
static inline int ll_new_hw_segment(struct request_queue *q,
struct request *req,
struct bio *bio)
@@ -385,6 +356,14 @@ static bool req_no_special_merge(struct request *req)
return !q->mq_ops && req->special;
}
+static int req_gap_to_prev(struct request *req, struct request *next)
+{
+ struct bio *prev = req->biotail;
+
+ return bvec_gap_to_prev(&prev->bi_io_vec[prev->bi_vcnt - 1],
+ next->bio->bi_io_vec[0].bv_offset);
+}
+
static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
struct request *next)
{
@@ -399,6 +378,10 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
if (req_no_special_merge(req) || req_no_special_merge(next))
return 0;
+ if (test_bit(QUEUE_FLAG_SG_GAPS, &q->queue_flags) &&
+ req_gap_to_prev(req, next))
+ return 0;
+
/*
* Will it become too large?
*/
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 32e8dbb9ad1c..d53a764b05ea 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -68,9 +68,9 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
}
/*
- * Wakeup all potentially sleeping on normal (non-reserved) tags
+ * Wakeup all potentially sleeping on tags
*/
-static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
+void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
{
struct blk_mq_bitmap_tags *bt;
int i, wake_index;
@@ -85,6 +85,12 @@ static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
wake_index = bt_index_inc(wake_index);
}
+
+ if (include_reserve) {
+ bt = &tags->breserved_tags;
+ if (waitqueue_active(&bt->bs[0].wait))
+ wake_up(&bt->bs[0].wait);
+ }
}
/*
@@ -100,7 +106,7 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
atomic_dec(&tags->active_queues);
- blk_mq_tag_wakeup_all(tags);
+ blk_mq_tag_wakeup_all(tags, false);
}
/*
@@ -134,35 +140,39 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
return atomic_read(&hctx->nr_active) < depth;
}
-static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
+static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag,
+ bool nowrap)
{
- int tag, org_last_tag, end;
- bool wrap = last_tag != 0;
+ int tag, org_last_tag = last_tag;
- org_last_tag = last_tag;
- end = bm->depth;
- do {
-restart:
- tag = find_next_zero_bit(&bm->word, end, last_tag);
- if (unlikely(tag >= end)) {
+ while (1) {
+ tag = find_next_zero_bit(&bm->word, bm->depth, last_tag);
+ if (unlikely(tag >= bm->depth)) {
/*
- * We started with an offset, start from 0 to
+ * We started with an offset, and we didn't reset the
+ * offset to 0 in a failure case, so start from 0 to
* exhaust the map.
*/
- if (wrap) {
- wrap = false;
- end = org_last_tag;
- last_tag = 0;
- goto restart;
+ if (org_last_tag && last_tag && !nowrap) {
+ last_tag = org_last_tag = 0;
+ continue;
}
return -1;
}
+
+ if (!test_and_set_bit(tag, &bm->word))
+ break;
+
last_tag = tag + 1;
- } while (test_and_set_bit(tag, &bm->word));
+ if (last_tag >= bm->depth - 1)
+ last_tag = 0;
+ }
return tag;
}
+#define BT_ALLOC_RR(tags) (tags->alloc_policy == BLK_TAG_ALLOC_RR)
+
/*
* Straight forward bitmap tag implementation, where each bit is a tag
* (cleared == free, and set == busy). The small twist is using per-cpu
@@ -175,7 +185,7 @@ restart:
* until the map is exhausted.
*/
static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
- unsigned int *tag_cache)
+ unsigned int *tag_cache, struct blk_mq_tags *tags)
{
unsigned int last_tag, org_last_tag;
int index, i, tag;
@@ -187,15 +197,24 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
index = TAG_TO_INDEX(bt, last_tag);
for (i = 0; i < bt->map_nr; i++) {
- tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag));
+ tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag),
+ BT_ALLOC_RR(tags));
if (tag != -1) {
tag += (index << bt->bits_per_word);
goto done;
}
- last_tag = 0;
- if (++index >= bt->map_nr)
+ /*
+ * Jump to next index, and reset the last tag to be the
+ * first tag of that index
+ */
+ index++;
+ last_tag = (index << bt->bits_per_word);
+
+ if (index >= bt->map_nr) {
index = 0;
+ last_tag = 0;
+ }
}
*tag_cache = 0;
@@ -206,7 +225,7 @@ static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
* up using the specific cached tag.
*/
done:
- if (tag == org_last_tag) {
+ if (tag == org_last_tag || unlikely(BT_ALLOC_RR(tags))) {
last_tag = tag + 1;
if (last_tag >= bt->depth - 1)
last_tag = 0;
@@ -235,13 +254,13 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
static int bt_get(struct blk_mq_alloc_data *data,
struct blk_mq_bitmap_tags *bt,
struct blk_mq_hw_ctx *hctx,
- unsigned int *last_tag)
+ unsigned int *last_tag, struct blk_mq_tags *tags)
{
struct bt_wait_state *bs;
DEFINE_WAIT(wait);
int tag;
- tag = __bt_get(hctx, bt, last_tag);
+ tag = __bt_get(hctx, bt, last_tag, tags);
if (tag != -1)
return tag;
@@ -252,7 +271,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
do {
prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
- tag = __bt_get(hctx, bt, last_tag);
+ tag = __bt_get(hctx, bt, last_tag, tags);
if (tag != -1)
break;
@@ -267,7 +286,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
* Retry tag allocation after running the hardware queue,
* as running the queue may also have found completions.
*/
- tag = __bt_get(hctx, bt, last_tag);
+ tag = __bt_get(hctx, bt, last_tag, tags);
if (tag != -1)
break;
@@ -298,7 +317,7 @@ static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
int tag;
tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
- &data->ctx->last_tag);
+ &data->ctx->last_tag, data->hctx->tags);
if (tag >= 0)
return tag + data->hctx->tags->nr_reserved_tags;
@@ -314,7 +333,8 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
return BLK_MQ_TAG_FAIL;
}
- tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero);
+ tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, &zero,
+ data->hctx->tags);
if (tag < 0)
return BLK_MQ_TAG_FAIL;
@@ -386,7 +406,8 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
BUG_ON(real_tag >= tags->nr_tags);
bt_clear_tag(&tags->bitmap_tags, real_tag);
- *last_tag = real_tag;
+ if (likely(tags->alloc_policy == BLK_TAG_ALLOC_FIFO))
+ *last_tag = real_tag;
} else {
BUG_ON(tag >= tags->nr_reserved_tags);
bt_clear_tag(&tags->breserved_tags, tag);
@@ -503,6 +524,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL);
if (!bt->bs) {
kfree(bt->map);
+ bt->map = NULL;
return -ENOMEM;
}
@@ -523,10 +545,12 @@ static void bt_free(struct blk_mq_bitmap_tags *bt)
}
static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
- int node)
+ int node, int alloc_policy)
{
unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
+ tags->alloc_policy = alloc_policy;
+
if (bt_alloc(&tags->bitmap_tags, depth, node, false))
goto enomem;
if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true))
@@ -540,7 +564,8 @@ enomem:
}
struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
- unsigned int reserved_tags, int node)
+ unsigned int reserved_tags,
+ int node, int alloc_policy)
{
struct blk_mq_tags *tags;
@@ -556,7 +581,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
tags->nr_tags = total_tags;
tags->nr_reserved_tags = reserved_tags;
- return blk_mq_init_bitmap_tags(tags, node);
+ return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
}
void blk_mq_free_tags(struct blk_mq_tags *tags)
@@ -584,7 +609,7 @@ int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
* static and should never need resizing.
*/
bt_update_count(&tags->bitmap_tags, tdepth);
- blk_mq_tag_wakeup_all(tags);
+ blk_mq_tag_wakeup_all(tags, false);
return 0;
}
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 6206ed17ef76..90767b370308 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -42,10 +42,12 @@ struct blk_mq_tags {
struct request **rqs;
struct list_head page_list;
+
+ int alloc_policy;
};
-extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node);
+extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node, int alloc_policy);
extern void blk_mq_free_tags(struct blk_mq_tags *tags);
extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
@@ -54,6 +56,7 @@ extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag);
extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth);
+extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
enum {
BLK_MQ_TAG_CACHE_MIN = 1,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index da1ab5641227..4f4bea21052e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -33,6 +33,7 @@ static DEFINE_MUTEX(all_q_mutex);
static LIST_HEAD(all_q_list);
static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx);
+static void blk_mq_run_queues(struct request_queue *q);
/*
* Check if any of the ctx's have pending work in this hardware queue
@@ -107,7 +108,7 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref)
wake_up_all(&q->mq_freeze_wq);
}
-static void blk_mq_freeze_queue_start(struct request_queue *q)
+void blk_mq_freeze_queue_start(struct request_queue *q)
{
bool freeze;
@@ -117,9 +118,10 @@ static void blk_mq_freeze_queue_start(struct request_queue *q)
if (freeze) {
percpu_ref_kill(&q->mq_usage_counter);
- blk_mq_run_queues(q, false);
+ blk_mq_run_queues(q);
}
}
+EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
static void blk_mq_freeze_queue_wait(struct request_queue *q)
{
@@ -135,8 +137,9 @@ void blk_mq_freeze_queue(struct request_queue *q)
blk_mq_freeze_queue_start(q);
blk_mq_freeze_queue_wait(q);
}
+EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
-static void blk_mq_unfreeze_queue(struct request_queue *q)
+void blk_mq_unfreeze_queue(struct request_queue *q)
{
bool wake;
@@ -149,6 +152,24 @@ static void blk_mq_unfreeze_queue(struct request_queue *q)
wake_up_all(&q->mq_freeze_wq);
}
}
+EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
+
+void blk_mq_wake_waiters(struct request_queue *q)
+{
+ struct blk_mq_hw_ctx *hctx;
+ unsigned int i;
+
+ queue_for_each_hw_ctx(q, hctx, i)
+ if (blk_mq_hw_queue_mapped(hctx))
+ blk_mq_tag_wakeup_all(hctx->tags, true);
+
+ /*
+ * If we are called because the queue has now been marked as
+ * dying, we need to ensure that processes currently waiting on
+ * the queue are notified as well.
+ */
+ wake_up_all(&q->mq_freeze_wq);
+}
bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
{
@@ -258,8 +279,10 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
ctx = alloc_data.ctx;
}
blk_mq_put_ctx(ctx);
- if (!rq)
+ if (!rq) {
+ blk_mq_queue_exit(q);
return ERR_PTR(-EWOULDBLOCK);
+ }
return rq;
}
EXPORT_SYMBOL(blk_mq_alloc_request);
@@ -383,6 +406,12 @@ void blk_mq_complete_request(struct request *rq)
}
EXPORT_SYMBOL(blk_mq_complete_request);
+int blk_mq_request_started(struct request *rq)
+{
+ return test_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
+}
+EXPORT_SYMBOL_GPL(blk_mq_request_started);
+
void blk_mq_start_request(struct request *rq)
{
struct request_queue *q = rq->q;
@@ -500,12 +529,38 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
}
EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
+void blk_mq_cancel_requeue_work(struct request_queue *q)
+{
+ cancel_work_sync(&q->requeue_work);
+}
+EXPORT_SYMBOL_GPL(blk_mq_cancel_requeue_work);
+
void blk_mq_kick_requeue_list(struct request_queue *q)
{
kblockd_schedule_work(&q->requeue_work);
}
EXPORT_SYMBOL(blk_mq_kick_requeue_list);
+void blk_mq_abort_requeue_list(struct request_queue *q)
+{
+ unsigned long flags;
+ LIST_HEAD(rq_list);
+
+ spin_lock_irqsave(&q->requeue_lock, flags);
+ list_splice_init(&q->requeue_list, &rq_list);
+ spin_unlock_irqrestore(&q->requeue_lock, flags);
+
+ while (!list_empty(&rq_list)) {
+ struct request *rq;
+
+ rq = list_first_entry(&rq_list, struct request, queuelist);
+ list_del_init(&rq->queuelist);
+ rq->errors = -EIO;
+ blk_mq_end_request(rq, rq->errors);
+ }
+}
+EXPORT_SYMBOL(blk_mq_abort_requeue_list);
+
static inline bool is_flush_request(struct request *rq,
struct blk_flush_queue *fq, unsigned int tag)
{
@@ -566,13 +621,24 @@ void blk_mq_rq_timed_out(struct request *req, bool reserved)
break;
}
}
-
+
static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
struct request *rq, void *priv, bool reserved)
{
struct blk_mq_timeout_data *data = priv;
- if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
+ if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
+ /*
+ * If a request wasn't started before the queue was
+ * marked dying, kill it here or it'll go unnoticed.
+ */
+ if (unlikely(blk_queue_dying(rq->q))) {
+ rq->errors = -EIO;
+ blk_mq_complete_request(rq);
+ }
+ return;
+ }
+ if (rq->cmd_flags & REQ_NO_TIMEOUT)
return;
if (time_after_eq(jiffies, rq->deadline)) {
@@ -838,7 +904,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
&hctx->run_work, 0);
}
-void blk_mq_run_queues(struct request_queue *q, bool async)
+static void blk_mq_run_queues(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
int i;
@@ -849,10 +915,9 @@ void blk_mq_run_queues(struct request_queue *q, bool async)
test_bit(BLK_MQ_S_STOPPED, &hctx->state))
continue;
- blk_mq_run_hw_queue(hctx, async);
+ blk_mq_run_hw_queue(hctx, false);
}
}
-EXPORT_SYMBOL(blk_mq_run_queues);
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
{
@@ -890,7 +955,6 @@ void blk_mq_start_hw_queues(struct request_queue *q)
}
EXPORT_SYMBOL(blk_mq_start_hw_queues);
-
void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
{
struct blk_mq_hw_ctx *hctx;
@@ -1359,7 +1423,8 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
size_t rq_size, left;
tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags,
- set->numa_node);
+ set->numa_node,
+ BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
if (!tags)
return NULL;
@@ -1577,10 +1642,8 @@ static void blk_mq_free_hw_queues(struct request_queue *q,
struct blk_mq_hw_ctx *hctx;
unsigned int i;
- queue_for_each_hw_ctx(q, hctx, i) {
+ queue_for_each_hw_ctx(q, hctx, i)
free_cpumask_var(hctx->cpumask);
- kfree(hctx);
- }
}
static int blk_mq_init_hctx(struct request_queue *q,
@@ -1601,7 +1664,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
hctx->queue = q;
hctx->queue_num = hctx_idx;
hctx->flags = set->flags;
- hctx->cmd_size = set->cmd_size;
blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
blk_mq_hctx_notify, hctx);
@@ -1806,6 +1868,27 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
mutex_unlock(&set->tag_list_lock);
}
+/*
+ * It is the actual release handler for mq, but we do it from
+ * request queue's release handler for avoiding use-after-free
+ * and headache because q->mq_kobj shouldn't have been introduced,
+ * but we can't group ctx/kctx kobj without it.
+ */
+void blk_mq_release(struct request_queue *q)
+{
+ struct blk_mq_hw_ctx *hctx;
+ unsigned int i;
+
+ /* hctx kobj stays in hctx */
+ queue_for_each_hw_ctx(q, hctx, i)
+ kfree(hctx);
+
+ kfree(q->queue_hw_ctx);
+
+ /* ctx kobj stays in queue_ctx */
+ free_percpu(q->queue_ctx);
+}
+
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
{
struct blk_mq_hw_ctx **hctxs;
@@ -1939,12 +2022,8 @@ void blk_mq_free_queue(struct request_queue *q)
percpu_ref_exit(&q->mq_usage_counter);
- free_percpu(q->queue_ctx);
- kfree(q->queue_hw_ctx);
kfree(q->mq_map);
- q->queue_ctx = NULL;
- q->queue_hw_ctx = NULL;
q->mq_map = NULL;
mutex_lock(&all_q_mutex);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 206230e64f79..6a48c4c0d8a2 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -32,6 +32,7 @@ void blk_mq_free_queue(struct request_queue *q);
void blk_mq_clone_flush_request(struct request *flush_rq,
struct request *orig_rq);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
+void blk_mq_wake_waiters(struct request_queue *q);
/*
* CPU hotplug helpers
@@ -61,6 +62,8 @@ extern void blk_mq_sysfs_unregister(struct request_queue *q);
extern void blk_mq_rq_timed_out(struct request *req, bool reserved);
+void blk_mq_release(struct request_queue *q);
+
/*
* Basic implementation of sparser bitmap, allowing the user to spread
* the bits over more cachelines.
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 935ea2aa0730..faaf36ade7eb 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -517,6 +517,8 @@ static void blk_release_queue(struct kobject *kobj)
if (!q->mq_ops)
blk_free_flush_queue(q->fq);
+ else
+ blk_mq_release(q);
blk_trace_shutdown(q);
diff --git a/block/blk-tag.c b/block/blk-tag.c
index a185b86741e5..f0344e6939d5 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -119,7 +119,7 @@ fail:
}
static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
- int depth)
+ int depth, int alloc_policy)
{
struct blk_queue_tag *tags;
@@ -131,6 +131,8 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
goto fail;
atomic_set(&tags->refcnt, 1);
+ tags->alloc_policy = alloc_policy;
+ tags->next_tag = 0;
return tags;
fail:
kfree(tags);
@@ -140,10 +142,11 @@ fail:
/**
* blk_init_tags - initialize the tag info for an external tag map
* @depth: the maximum queue depth supported
+ * @alloc_policy: tag allocation policy
**/
-struct blk_queue_tag *blk_init_tags(int depth)
+struct blk_queue_tag *blk_init_tags(int depth, int alloc_policy)
{
- return __blk_queue_init_tags(NULL, depth);
+ return __blk_queue_init_tags(NULL, depth, alloc_policy);
}
EXPORT_SYMBOL(blk_init_tags);
@@ -152,19 +155,20 @@ EXPORT_SYMBOL(blk_init_tags);
* @q: the request queue for the device
* @depth: the maximum queue depth supported
* @tags: the tag to use
+ * @alloc_policy: tag allocation policy
*
* Queue lock must be held here if the function is called to resize an
* existing map.
**/
int blk_queue_init_tags(struct request_queue *q, int depth,
- struct blk_queue_tag *tags)
+ struct blk_queue_tag *tags, int alloc_policy)
{
int rc;
BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
if (!tags && !q->queue_tags) {
- tags = __blk_queue_init_tags(q, depth);
+ tags = __blk_queue_init_tags(q, depth, alloc_policy);
if (!tags)
return -ENOMEM;
@@ -344,9 +348,21 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
}
do {
- tag = find_first_zero_bit(bqt->tag_map, max_depth);
- if (tag >= max_depth)
- return 1;
+ if (bqt->alloc_policy == BLK_TAG_ALLOC_FIFO) {
+ tag = find_first_zero_bit(bqt->tag_map, max_depth);
+ if (tag >= max_depth)
+ return 1;
+ } else {
+ int start = bqt->next_tag;
+ int size = min_t(int, bqt->max_depth, max_depth + start);
+ tag = find_next_zero_bit(bqt->tag_map, size, start);
+ if (tag >= size && start + size > bqt->max_depth) {
+ size = start + size - bqt->max_depth;
+ tag = find_first_zero_bit(bqt->tag_map, size);
+ }
+ if (tag >= size)
+ return 1;
+ }
} while (test_and_set_bit_lock(tag, bqt->tag_map));
/*
@@ -354,6 +370,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
* See blk_queue_end_tag for details.
*/
+ bqt->next_tag = (tag + 1) % bqt->max_depth;
rq->cmd_flags |= REQ_QUEUED;
rq->tag = tag;
bqt->tag_index[tag] = rq;
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 56c025894cdf..246dfb16c3d9 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -190,6 +190,9 @@ void blk_add_timer(struct request *req)
struct request_queue *q = req->q;
unsigned long expiry;
+ if (req->cmd_flags & REQ_NO_TIMEOUT)
+ return;
+
/* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
if (!q->mq_ops && !q->rq_timed_out_fn)
return;
diff --git a/block/bsg.c b/block/bsg.c
index 276e869e686c..d214e929ce18 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -136,42 +136,6 @@ static inline struct hlist_head *bsg_dev_idx_hash(int index)
return &bsg_device_list[index & (BSG_LIST_ARRAY_SIZE - 1)];
}
-static int bsg_io_schedule(struct bsg_device *bd)
-{
- DEFINE_WAIT(wait);
- int ret = 0;
-
- spin_lock_irq(&bd->lock);
-
- BUG_ON(bd->done_cmds > bd->queued_cmds);
-
- /*
- * -ENOSPC or -ENODATA? I'm going for -ENODATA, meaning "I have no
- * work to do", even though we return -ENOSPC after this same test
- * during bsg_write() -- there, it means our buffer can't have more
- * bsg_commands added to it, thus has no space left.
- */
- if (bd->done_cmds == bd->queued_cmds) {
- ret = -ENODATA;
- goto unlock;
- }
-
- if (!test_bit(BSG_F_BLOCK, &bd->flags)) {
- ret = -EAGAIN;
- goto unlock;
- }
-
- prepare_to_wait(&bd->wq_done, &wait, TASK_UNINTERRUPTIBLE);
- spin_unlock_irq(&bd->lock);
- io_schedule();
- finish_wait(&bd->wq_done, &wait);
-
- return ret;
-unlock:
- spin_unlock_irq(&bd->lock);
- return ret;
-}
-
static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
struct sg_io_v4 *hdr, struct bsg_device *bd,
fmode_t has_write_perm)
@@ -482,6 +446,30 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
return ret;
}
+static bool bsg_complete(struct bsg_device *bd)
+{
+ bool ret = false;
+ bool spin;
+
+ do {
+ spin_lock_irq(&bd->lock);
+
+ BUG_ON(bd->done_cmds > bd->queued_cmds);
+
+ /*
+ * All commands consumed.
+ */
+ if (bd->done_cmds == bd->queued_cmds)
+ ret = true;
+
+ spin = !test_bit(BSG_F_BLOCK, &bd->flags);
+
+ spin_unlock_irq(&bd->lock);
+ } while (!ret && spin);
+
+ return ret;
+}
+
static int bsg_complete_all_commands(struct bsg_device *bd)
{
struct bsg_command *bc;
@@ -492,17 +480,7 @@ static int bsg_complete_all_commands(struct bsg_device *bd)
/*
* wait for all commands to complete
*/
- ret = 0;
- do {
- ret = bsg_io_schedule(bd);
- /*
- * look for -ENODATA specifically -- we'll sometimes get
- * -ERESTARTSYS when we've taken a signal, but we can't
- * return until we're done freeing the queue, so ignore
- * it. The signal will get handled when we're done freeing
- * the bsg_device.
- */
- } while (ret != -ENODATA);
+ io_wait_event(bd->wq_done, bsg_complete(bd));
/*
* discard done commands
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6f2751d305de..5da8e6e9ab4b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3590,6 +3590,11 @@ retry:
blkcg = bio_blkcg(bio);
cfqg = cfq_lookup_create_cfqg(cfqd, blkcg);
+ if (!cfqg) {
+ cfqq = &cfqd->oom_cfqq;
+ goto out;
+ }
+
cfqq = cic_to_cfqq(cic, is_sync);
/*
@@ -3626,7 +3631,7 @@ retry:
} else
cfqq = &cfqd->oom_cfqq;
}
-
+out:
if (new_cfqq)
kmem_cache_free(cfq_pool, new_cfqq);
@@ -3656,12 +3661,17 @@ static struct cfq_queue *
cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
struct bio *bio, gfp_t gfp_mask)
{
- const int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
- const int ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
+ int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
+ int ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
struct cfq_queue **async_cfqq = NULL;
struct cfq_queue *cfqq = NULL;
if (!is_sync) {
+ if (!ioprio_valid(cic->ioprio)) {
+ struct task_struct *tsk = current;
+ ioprio = task_nice_ioprio(tsk);
+ ioprio_class = task_nice_ioclass(tsk);
+ }
async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio);
cfqq = *async_cfqq;
}
diff --git a/block/ioctl.c b/block/ioctl.c
index 6c7bf903742f..7d8befde2aca 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -198,7 +198,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start,
if (start + len > (i_size_read(bdev->bd_inode) >> 9))
return -EINVAL;
- return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL);
+ return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL, false);
}
static int put_ushort(unsigned long arg, unsigned short val)
diff --git a/block/partitions/check.c b/block/partitions/check.c
index 9ac1df74f699..16118d11dbfc 100644
--- a/block/partitions/check.c
+++ b/block/partitions/check.c
@@ -184,12 +184,12 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
if (err)
/* The partition is unrecognized. So report I/O errors if there were any */
res = err;
- if (!res)
- strlcat(state->pp_buf, " unknown partition table\n", PAGE_SIZE);
- else if (warn_no_part)
- strlcat(state->pp_buf, " unable to read partition table\n", PAGE_SIZE);
-
- printk(KERN_INFO "%s", state->pp_buf);
+ if (res) {
+ if (warn_no_part)
+ strlcat(state->pp_buf,
+ " unable to read partition table\n", PAGE_SIZE);
+ printk(KERN_INFO "%s", state->pp_buf);
+ }
free_page((unsigned long)state->pp_buf);
free_partitions(state);
diff --git a/block/partitions/efi.c b/block/partitions/efi.c
index 56d08fd75b1a..26cb624ace05 100644
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -715,7 +715,7 @@ int efi_partition(struct parsed_partitions *state)
state->parts[i + 1].flags = ADDPART_FLAG_RAID;
info = &state->parts[i + 1].info;
- efi_guid_unparse(&ptes[i].unique_partition_guid, info->uuid);
+ efi_guid_to_str(&ptes[i].unique_partition_guid, info->uuid);
/* Naively convert UTF16-LE to 7 bits. */
label_max = min(ARRAY_SIZE(info->volname) - 1,
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 28163fad3c5d..e1f71c396193 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -332,7 +332,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
ret = 0;
if (hdr->iovec_count) {
- size_t iov_data_len;
+ struct iov_iter i;
struct iovec *iov = NULL;
ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count,
@@ -342,20 +342,11 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
goto out_free_cdb;
}
- iov_data_len = ret;
- ret = 0;
-
/* SG_IO howto says that the shorter of the two wins */
- if (hdr->dxfer_len < iov_data_len) {
- hdr->iovec_count = iov_shorten(iov,
- hdr->iovec_count,
- hdr->dxfer_len);
- iov_data_len = hdr->dxfer_len;
- }
+ iov_iter_init(&i, rq_data_dir(rq), iov, hdr->iovec_count,
+ min_t(unsigned, ret, hdr->dxfer_len));
- ret = blk_rq_map_user_iov(q, rq, NULL, (struct sg_iovec *) iov,
- hdr->iovec_count,
- iov_data_len, GFP_KERNEL);
+ ret = blk_rq_map_user_iov(q, rq, NULL, &i, GFP_KERNEL);
kfree(iov);
} else if (hdr->dxfer_len)
ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len,