diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bio.c | 426 | ||||
-rw-r--r-- | block/blk-core.c | 13 | ||||
-rw-r--r-- | block/blk-lib.c | 26 | ||||
-rw-r--r-- | block/blk-map.c | 172 | ||||
-rw-r--r-- | block/blk-merge.c | 41 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 1 | ||||
-rw-r--r-- | block/blk-mq.c | 93 | ||||
-rw-r--r-- | block/blk-mq.h | 2 | ||||
-rw-r--r-- | block/blk-sysfs.c | 2 | ||||
-rw-r--r-- | block/blk-timeout.c | 3 | ||||
-rw-r--r-- | block/cfq-iosched.c | 7 | ||||
-rw-r--r-- | block/partitions/efi.c | 2 | ||||
-rw-r--r-- | block/scsi_ioctl.c | 17 |
13 files changed, 348 insertions, 457 deletions
diff --git a/block/bio.c b/block/bio.c index 471d7382c7d1..f66a4eae16ee 100644 --- a/block/bio.c +++ b/block/bio.c @@ -28,7 +28,6 @@ #include <linux/mempool.h> #include <linux/workqueue.h> #include <linux/cgroup.h> -#include <scsi/sg.h> /* for struct sg_iovec */ #include <trace/events/block.h> @@ -1022,21 +1021,11 @@ void bio_copy_data(struct bio *dst, struct bio *src) EXPORT_SYMBOL(bio_copy_data); struct bio_map_data { - int nr_sgvecs; int is_our_pages; - struct sg_iovec sgvecs[]; + struct iov_iter iter; + struct iovec iov[]; }; -static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, - const struct sg_iovec *iov, int iov_count, - int is_our_pages) -{ - memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); - bmd->nr_sgvecs = iov_count; - bmd->is_our_pages = is_our_pages; - bio->bi_private = bmd; -} - static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count, gfp_t gfp_mask) { @@ -1044,85 +1033,101 @@ static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count, return NULL; return kmalloc(sizeof(struct bio_map_data) + - sizeof(struct sg_iovec) * iov_count, gfp_mask); + sizeof(struct iovec) * iov_count, gfp_mask); } -static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count, - int to_user, int from_user, int do_free_page) +/** + * bio_copy_from_iter - copy all pages from iov_iter to bio + * @bio: The &struct bio which describes the I/O as destination + * @iter: iov_iter as source + * + * Copy all pages from iov_iter to bio. + * Returns 0 on success, or error on failure. + */ +static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter) { - int ret = 0, i; + int i; struct bio_vec *bvec; - int iov_idx = 0; - unsigned int iov_off = 0; bio_for_each_segment_all(bvec, bio, i) { - char *bv_addr = page_address(bvec->bv_page); - unsigned int bv_len = bvec->bv_len; + ssize_t ret; - while (bv_len && iov_idx < iov_count) { - unsigned int bytes; - char __user *iov_addr; + ret = copy_page_from_iter(bvec->bv_page, + bvec->bv_offset, + bvec->bv_len, + &iter); - bytes = min_t(unsigned int, - iov[iov_idx].iov_len - iov_off, bv_len); - iov_addr = iov[iov_idx].iov_base + iov_off; + if (!iov_iter_count(&iter)) + break; - if (!ret) { - if (to_user) - ret = copy_to_user(iov_addr, bv_addr, - bytes); + if (ret < bvec->bv_len) + return -EFAULT; + } - if (from_user) - ret = copy_from_user(bv_addr, iov_addr, - bytes); + return 0; +} - if (ret) - ret = -EFAULT; - } +/** + * bio_copy_to_iter - copy all pages from bio to iov_iter + * @bio: The &struct bio which describes the I/O as source + * @iter: iov_iter as destination + * + * Copy all pages from bio to iov_iter. + * Returns 0 on success, or error on failure. + */ +static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) +{ + int i; + struct bio_vec *bvec; - bv_len -= bytes; - bv_addr += bytes; - iov_addr += bytes; - iov_off += bytes; + bio_for_each_segment_all(bvec, bio, i) { + ssize_t ret; - if (iov[iov_idx].iov_len == iov_off) { - iov_idx++; - iov_off = 0; - } - } + ret = copy_page_to_iter(bvec->bv_page, + bvec->bv_offset, + bvec->bv_len, + &iter); + + if (!iov_iter_count(&iter)) + break; - if (do_free_page) - __free_page(bvec->bv_page); + if (ret < bvec->bv_len) + return -EFAULT; } - return ret; + return 0; +} + +static void bio_free_pages(struct bio *bio) +{ + struct bio_vec *bvec; + int i; + + bio_for_each_segment_all(bvec, bio, i) + __free_page(bvec->bv_page); } /** * bio_uncopy_user - finish previously mapped bio * @bio: bio being terminated * - * Free pages allocated from bio_copy_user() and write back data + * Free pages allocated from bio_copy_user_iov() and write back data * to user space in case of a read. */ int bio_uncopy_user(struct bio *bio) { struct bio_map_data *bmd = bio->bi_private; - struct bio_vec *bvec; - int ret = 0, i; + int ret = 0; if (!bio_flagged(bio, BIO_NULL_MAPPED)) { /* * if we're in a workqueue, the request is orphaned, so * don't copy into a random user address space, just free. */ - if (current->mm) - ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, - bio_data_dir(bio) == READ, - 0, bmd->is_our_pages); - else if (bmd->is_our_pages) - bio_for_each_segment_all(bvec, bio, i) - __free_page(bvec->bv_page); + if (current->mm && bio_data_dir(bio) == READ) + ret = bio_copy_to_iter(bio, bmd->iter); + if (bmd->is_our_pages) + bio_free_pages(bio); } kfree(bmd); bio_put(bio); @@ -1132,12 +1137,10 @@ EXPORT_SYMBOL(bio_uncopy_user); /** * bio_copy_user_iov - copy user data to bio - * @q: destination block queue - * @map_data: pointer to the rq_map_data holding pages (if necessary) - * @iov: the iovec. - * @iov_count: number of elements in the iovec - * @write_to_vm: bool indicating writing to pages or not - * @gfp_mask: memory allocation flags + * @q: destination block queue + * @map_data: pointer to the rq_map_data holding pages (if necessary) + * @iter: iovec iterator + * @gfp_mask: memory allocation flags * * Prepares and returns a bio for indirect user io, bouncing data * to/from kernel pages as necessary. Must be paired with @@ -1145,25 +1148,25 @@ EXPORT_SYMBOL(bio_uncopy_user); */ struct bio *bio_copy_user_iov(struct request_queue *q, struct rq_map_data *map_data, - const struct sg_iovec *iov, int iov_count, - int write_to_vm, gfp_t gfp_mask) + const struct iov_iter *iter, + gfp_t gfp_mask) { struct bio_map_data *bmd; - struct bio_vec *bvec; struct page *page; struct bio *bio; int i, ret; int nr_pages = 0; - unsigned int len = 0; + unsigned int len = iter->count; unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0; - for (i = 0; i < iov_count; i++) { + for (i = 0; i < iter->nr_segs; i++) { unsigned long uaddr; unsigned long end; unsigned long start; - uaddr = (unsigned long)iov[i].iov_base; - end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; + uaddr = (unsigned long) iter->iov[i].iov_base; + end = (uaddr + iter->iov[i].iov_len + PAGE_SIZE - 1) + >> PAGE_SHIFT; start = uaddr >> PAGE_SHIFT; /* @@ -1173,22 +1176,31 @@ struct bio *bio_copy_user_iov(struct request_queue *q, return ERR_PTR(-EINVAL); nr_pages += end - start; - len += iov[i].iov_len; } if (offset) nr_pages++; - bmd = bio_alloc_map_data(iov_count, gfp_mask); + bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask); if (!bmd) return ERR_PTR(-ENOMEM); + /* + * We need to do a deep copy of the iov_iter including the iovecs. + * The caller provided iov might point to an on-stack or otherwise + * shortlived one. + */ + bmd->is_our_pages = map_data ? 0 : 1; + memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs); + iov_iter_init(&bmd->iter, iter->type, bmd->iov, + iter->nr_segs, iter->count); + ret = -ENOMEM; bio = bio_kmalloc(gfp_mask, nr_pages); if (!bio) goto out_bmd; - if (!write_to_vm) + if (iter->type & WRITE) bio->bi_rw |= REQ_WRITE; ret = 0; @@ -1236,20 +1248,18 @@ struct bio *bio_copy_user_iov(struct request_queue *q, /* * success */ - if ((!write_to_vm && (!map_data || !map_data->null_mapped)) || + if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) || (map_data && map_data->from_user)) { - ret = __bio_copy_iov(bio, iov, iov_count, 0, 1, 0); + ret = bio_copy_from_iter(bio, *iter); if (ret) goto cleanup; } - bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1); + bio->bi_private = bmd; return bio; cleanup: if (!map_data) - bio_for_each_segment_all(bvec, bio, i) - __free_page(bvec->bv_page); - + bio_free_pages(bio); bio_put(bio); out_bmd: kfree(bmd); @@ -1257,46 +1267,30 @@ out_bmd: } /** - * bio_copy_user - copy user data to bio - * @q: destination block queue - * @map_data: pointer to the rq_map_data holding pages (if necessary) - * @uaddr: start of user address - * @len: length in bytes - * @write_to_vm: bool indicating writing to pages or not - * @gfp_mask: memory allocation flags + * bio_map_user_iov - map user iovec into bio + * @q: the struct request_queue for the bio + * @iter: iovec iterator + * @gfp_mask: memory allocation flags * - * Prepares and returns a bio for indirect user io, bouncing data - * to/from kernel pages as necessary. Must be paired with - * call bio_uncopy_user() on io completion. + * Map the user space address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. */ -struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, - unsigned long uaddr, unsigned int len, - int write_to_vm, gfp_t gfp_mask) +struct bio *bio_map_user_iov(struct request_queue *q, + const struct iov_iter *iter, + gfp_t gfp_mask) { - struct sg_iovec iov; - - iov.iov_base = (void __user *)uaddr; - iov.iov_len = len; - - return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask); -} -EXPORT_SYMBOL(bio_copy_user); - -static struct bio *__bio_map_user_iov(struct request_queue *q, - struct block_device *bdev, - const struct sg_iovec *iov, int iov_count, - int write_to_vm, gfp_t gfp_mask) -{ - int i, j; + int j; int nr_pages = 0; struct page **pages; struct bio *bio; int cur_page = 0; int ret, offset; + struct iov_iter i; + struct iovec iov; - for (i = 0; i < iov_count; i++) { - unsigned long uaddr = (unsigned long)iov[i].iov_base; - unsigned long len = iov[i].iov_len; + iov_for_each(iov, i, *iter) { + unsigned long uaddr = (unsigned long) iov.iov_base; + unsigned long len = iov.iov_len; unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; unsigned long start = uaddr >> PAGE_SHIFT; @@ -1326,16 +1320,17 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, if (!pages) goto out; - for (i = 0; i < iov_count; i++) { - unsigned long uaddr = (unsigned long)iov[i].iov_base; - unsigned long len = iov[i].iov_len; + iov_for_each(iov, i, *iter) { + unsigned long uaddr = (unsigned long) iov.iov_base; + unsigned long len = iov.iov_len; unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; unsigned long start = uaddr >> PAGE_SHIFT; const int local_nr_pages = end - start; const int page_limit = cur_page + local_nr_pages; ret = get_user_pages_fast(uaddr, local_nr_pages, - write_to_vm, &pages[cur_page]); + (iter->type & WRITE) != WRITE, + &pages[cur_page]); if (ret < local_nr_pages) { ret = -EFAULT; goto out_unmap; @@ -1375,72 +1370,10 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, /* * set data direction, and check if mapped pages need bouncing */ - if (!write_to_vm) + if (iter->type & WRITE) bio->bi_rw |= REQ_WRITE; - bio->bi_bdev = bdev; bio->bi_flags |= (1 << BIO_USER_MAPPED); - return bio; - - out_unmap: - for (i = 0; i < nr_pages; i++) { - if(!pages[i]) - break; - page_cache_release(pages[i]); - } - out: - kfree(pages); - bio_put(bio); - return ERR_PTR(ret); -} - -/** - * bio_map_user - map user address into bio - * @q: the struct request_queue for the bio - * @bdev: destination block device - * @uaddr: start of user address - * @len: length in bytes - * @write_to_vm: bool indicating writing to pages or not - * @gfp_mask: memory allocation flags - * - * Map the user space address into a bio suitable for io to a block - * device. Returns an error pointer in case of error. - */ -struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, - unsigned long uaddr, unsigned int len, int write_to_vm, - gfp_t gfp_mask) -{ - struct sg_iovec iov; - - iov.iov_base = (void __user *)uaddr; - iov.iov_len = len; - - return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask); -} -EXPORT_SYMBOL(bio_map_user); - -/** - * bio_map_user_iov - map user sg_iovec table into bio - * @q: the struct request_queue for the bio - * @bdev: destination block device - * @iov: the iovec. - * @iov_count: number of elements in the iovec - * @write_to_vm: bool indicating writing to pages or not - * @gfp_mask: memory allocation flags - * - * Map the user space address into a bio suitable for io to a block - * device. Returns an error pointer in case of error. - */ -struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, - const struct sg_iovec *iov, int iov_count, - int write_to_vm, gfp_t gfp_mask) -{ - struct bio *bio; - - bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm, - gfp_mask); - if (IS_ERR(bio)) - return bio; /* * subtle -- if __bio_map_user() ended up bouncing a bio, @@ -1449,8 +1382,18 @@ struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, * reference to it */ bio_get(bio); - return bio; + + out_unmap: + for (j = 0; j < nr_pages; j++) { + if (!pages[j]) + break; + page_cache_release(pages[j]); + } + out: + kfree(pages); + bio_put(bio); + return ERR_PTR(ret); } static void __bio_unmap_user(struct bio *bio) @@ -1492,8 +1435,18 @@ static void bio_map_kern_endio(struct bio *bio, int err) bio_put(bio); } -static struct bio *__bio_map_kern(struct request_queue *q, void *data, - unsigned int len, gfp_t gfp_mask) +/** + * bio_map_kern - map kernel address into bio + * @q: the struct request_queue for the bio + * @data: pointer to buffer to map + * @len: length in bytes + * @gfp_mask: allocation flags for bio allocation + * + * Map the kernel address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, + gfp_t gfp_mask) { unsigned long kaddr = (unsigned long)data; unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -1517,8 +1470,11 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data, bytes = len; if (bio_add_pc_page(q, bio, virt_to_page(data), bytes, - offset) < bytes) - break; + offset) < bytes) { + /* we don't support partial mappings */ + bio_put(bio); + return ERR_PTR(-EINVAL); + } data += bytes; len -= bytes; @@ -1528,57 +1484,26 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data, bio->bi_end_io = bio_map_kern_endio; return bio; } +EXPORT_SYMBOL(bio_map_kern); -/** - * bio_map_kern - map kernel address into bio - * @q: the struct request_queue for the bio - * @data: pointer to buffer to map - * @len: length in bytes - * @gfp_mask: allocation flags for bio allocation - * - * Map the kernel address into a bio suitable for io to a block - * device. Returns an error pointer in case of error. - */ -struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, - gfp_t gfp_mask) +static void bio_copy_kern_endio(struct bio *bio, int err) { - struct bio *bio; - - bio = __bio_map_kern(q, data, len, gfp_mask); - if (IS_ERR(bio)) - return bio; - - if (bio->bi_iter.bi_size == len) - return bio; - - /* - * Don't support partial mappings. - */ + bio_free_pages(bio); bio_put(bio); - return ERR_PTR(-EINVAL); } -EXPORT_SYMBOL(bio_map_kern); -static void bio_copy_kern_endio(struct bio *bio, int err) +static void bio_copy_kern_endio_read(struct bio *bio, int err) { + char *p = bio->bi_private; struct bio_vec *bvec; - const int read = bio_data_dir(bio) == READ; - struct bio_map_data *bmd = bio->bi_private; int i; - char *p = bmd->sgvecs[0].iov_base; bio_for_each_segment_all(bvec, bio, i) { - char *addr = page_address(bvec->bv_page); - - if (read) - memcpy(p, addr, bvec->bv_len); - - __free_page(bvec->bv_page); + memcpy(p, page_address(bvec->bv_page), bvec->bv_len); p += bvec->bv_len; } - kfree(bmd); - bio_put(bio); + bio_copy_kern_endio(bio, err); } /** @@ -1595,28 +1520,59 @@ static void bio_copy_kern_endio(struct bio *bio, int err) struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, gfp_t gfp_mask, int reading) { + unsigned long kaddr = (unsigned long)data; + unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = kaddr >> PAGE_SHIFT; struct bio *bio; - struct bio_vec *bvec; - int i; + void *p = data; + int nr_pages = 0; + + /* + * Overflow, abort + */ + if (end < start) + return ERR_PTR(-EINVAL); - bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask); - if (IS_ERR(bio)) - return bio; + nr_pages = end - start; + bio = bio_kmalloc(gfp_mask, nr_pages); + if (!bio) + return ERR_PTR(-ENOMEM); - if (!reading) { - void *p = data; + while (len) { + struct page *page; + unsigned int bytes = PAGE_SIZE; - bio_for_each_segment_all(bvec, bio, i) { - char *addr = page_address(bvec->bv_page); + if (bytes > len) + bytes = len; - memcpy(addr, p, bvec->bv_len); - p += bvec->bv_len; - } + page = alloc_page(q->bounce_gfp | gfp_mask); + if (!page) + goto cleanup; + + if (!reading) + memcpy(page_address(page), p, bytes); + + if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) + break; + + len -= bytes; + p += bytes; } - bio->bi_end_io = bio_copy_kern_endio; + if (reading) { + bio->bi_end_io = bio_copy_kern_endio_read; + bio->bi_private = data; + } else { + bio->bi_end_io = bio_copy_kern_endio; + bio->bi_rw |= REQ_WRITE; + } return bio; + +cleanup: + bio_free_pages(bio); + bio_put(bio); + return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL(bio_copy_kern); diff --git a/block/blk-core.c b/block/blk-core.c index 3ad405571dcc..794c3e7f01cf 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -607,7 +607,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; q->backing_dev_info.state = 0; - q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; + q->backing_dev_info.capabilities = 0; q->backing_dev_info.name = "block"; q->node = node_id; @@ -2048,6 +2048,13 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) return -EIO; + if (q->mq_ops) { + if (blk_queue_io_stat(q)) + blk_account_io_start(rq, true); + blk_mq_insert_request(rq, false, true, true); + return 0; + } + spin_lock_irqsave(q->queue_lock, flags); if (unlikely(blk_queue_dying(q))) { spin_unlock_irqrestore(q->queue_lock, flags); @@ -2907,7 +2914,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); static void __blk_rq_prep_clone(struct request *dst, struct request *src) { dst->cpu = src->cpu; - dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; + dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; dst->cmd_type = src->cmd_type; dst->__sector = blk_rq_pos(src); dst->__data_len = blk_rq_bytes(src); @@ -2945,8 +2952,6 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, if (!bs) bs = fs_bio_set; - blk_rq_init(NULL, rq); - __rq_for_each_bio(bio_src, rq_src) { bio = bio_clone_fast(bio_src, gfp_mask, bs); if (!bio) diff --git a/block/blk-lib.c b/block/blk-lib.c index 715e948f58a4..7688ee3f5d72 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -286,7 +286,6 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, * @discard: whether to discard the block range * * Description: - * Zero-fill a block range. If the discard flag is set and the block * device guarantees that subsequent READ operations to the block range * in question will return zeroes, the blocks will be discarded. Should @@ -303,26 +302,15 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, bool discard) { struct request_queue *q = bdev_get_queue(bdev); - unsigned char bdn[BDEVNAME_SIZE]; - - if (discard && blk_queue_discard(q) && q->limits.discard_zeroes_data) { - if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0)) - return 0; - - bdevname(bdev, bdn); - pr_warn("%s: DISCARD failed. Manually zeroing.\n", bdn); - } + if (discard && blk_queue_discard(q) && q->limits.discard_zeroes_data && + blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0) == 0) + return 0; - if (bdev_write_same(bdev)) { - - if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, - ZERO_PAGE(0))) - return 0; - - bdevname(bdev, bdn); - pr_warn("%s: WRITE SAME failed. Manually zeroing.\n", bdn); - } + if (bdev_write_same(bdev) && + blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, + ZERO_PAGE(0)) == 0) + return 0; return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask); } diff --git a/block/blk-map.c b/block/blk-map.c index f890d4345b0c..b8d2725324a6 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -5,7 +5,7 @@ #include <linux/module.h> #include <linux/bio.h> #include <linux/blkdev.h> -#include <scsi/sg.h> /* for struct sg_iovec */ +#include <linux/uio.h> #include "blk.h" @@ -39,138 +39,12 @@ static int __blk_rq_unmap_user(struct bio *bio) return ret; } -static int __blk_rq_map_user(struct request_queue *q, struct request *rq, - struct rq_map_data *map_data, void __user *ubuf, - unsigned int len, gfp_t gfp_mask) -{ - unsigned long uaddr; - struct bio *bio, *orig_bio; - int reading, ret; - - reading = rq_data_dir(rq) == READ; - - /* - * if alignment requirement is satisfied, map in user pages for - * direct dma. else, set up kernel bounce buffers - */ - uaddr = (unsigned long) ubuf; - if (blk_rq_aligned(q, uaddr, len) && !map_data) - bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask); - else - bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask); - - if (IS_ERR(bio)) - return PTR_ERR(bio); - - if (map_data && map_data->null_mapped) - bio->bi_flags |= (1 << BIO_NULL_MAPPED); - - orig_bio = bio; - blk_queue_bounce(q, &bio); - - /* - * We link the bounce buffer in and could have to traverse it - * later so we have to get a ref to prevent it from being freed - */ - bio_get(bio); - - ret = blk_rq_append_bio(q, rq, bio); - if (!ret) - return bio->bi_iter.bi_size; - - /* if it was boucned we must call the end io function */ - bio_endio(bio, 0); - __blk_rq_unmap_user(orig_bio); - bio_put(bio); - return ret; -} - -/** - * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage - * @q: request queue where request should be inserted - * @rq: request structure to fill - * @map_data: pointer to the rq_map_data holding pages (if necessary) - * @ubuf: the user buffer - * @len: length of user data - * @gfp_mask: memory allocation flags - * - * Description: - * Data will be mapped directly for zero copy I/O, if possible. Otherwise - * a kernel bounce buffer is used. - * - * A matching blk_rq_unmap_user() must be issued at the end of I/O, while - * still in process context. - * - * Note: The mapped bio may need to be bounced through blk_queue_bounce() - * before being submitted to the device, as pages mapped may be out of - * reach. It's the callers responsibility to make sure this happens. The - * original bio must be passed back in to blk_rq_unmap_user() for proper - * unmapping. - */ -int blk_rq_map_user(struct request_queue *q, struct request *rq, - struct rq_map_data *map_data, void __user *ubuf, - unsigned long len, gfp_t gfp_mask) -{ - unsigned long bytes_read = 0; - struct bio *bio = NULL; - int ret; - - if (len > (queue_max_hw_sectors(q) << 9)) - return -EINVAL; - if (!len) - return -EINVAL; - - if (!ubuf && (!map_data || !map_data->null_mapped)) - return -EINVAL; - - while (bytes_read != len) { - unsigned long map_len, end, start; - - map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE); - end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1) - >> PAGE_SHIFT; - start = (unsigned long)ubuf >> PAGE_SHIFT; - - /* - * A bad offset could cause us to require BIO_MAX_PAGES + 1 - * pages. If this happens we just lower the requested - * mapping len by a page so that we can fit - */ - if (end - start > BIO_MAX_PAGES) - map_len -= PAGE_SIZE; - - ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len, - gfp_mask); - if (ret < 0) - goto unmap_rq; - if (!bio) - bio = rq->bio; - bytes_read += ret; - ubuf += ret; - - if (map_data) - map_data->offset += ret; - } - - if (!bio_flagged(bio, BIO_USER_MAPPED)) - rq->cmd_flags |= REQ_COPY_USER; - - return 0; -unmap_rq: - blk_rq_unmap_user(bio); - rq->bio = NULL; - return ret; -} -EXPORT_SYMBOL(blk_rq_map_user); - /** * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage * @q: request queue where request should be inserted * @rq: request to map data to * @map_data: pointer to the rq_map_data holding pages (if necessary) - * @iov: pointer to the iovec - * @iov_count: number of elements in the iovec - * @len: I/O byte count + * @iter: iovec iterator * @gfp_mask: memory allocation flags * * Description: @@ -187,20 +61,21 @@ EXPORT_SYMBOL(blk_rq_map_user); * unmapping. */ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, - struct rq_map_data *map_data, const struct sg_iovec *iov, - int iov_count, unsigned int len, gfp_t gfp_mask) + struct rq_map_data *map_data, + const struct iov_iter *iter, gfp_t gfp_mask) { struct bio *bio; - int i, read = rq_data_dir(rq) == READ; int unaligned = 0; + struct iov_iter i; + struct iovec iov; - if (!iov || iov_count <= 0) + if (!iter || !iter->count) return -EINVAL; - for (i = 0; i < iov_count; i++) { - unsigned long uaddr = (unsigned long)iov[i].iov_base; + iov_for_each(iov, i, *iter) { + unsigned long uaddr = (unsigned long) iov.iov_base; - if (!iov[i].iov_len) + if (!iov.iov_len) return -EINVAL; /* @@ -210,16 +85,18 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, unaligned = 1; } - if (unaligned || (q->dma_pad_mask & len) || map_data) - bio = bio_copy_user_iov(q, map_data, iov, iov_count, read, - gfp_mask); + if (unaligned || (q->dma_pad_mask & iter->count) || map_data) + bio = bio_copy_user_iov(q, map_data, iter, gfp_mask); else - bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask); + bio = bio_map_user_iov(q, iter, gfp_mask); if (IS_ERR(bio)) return PTR_ERR(bio); - if (bio->bi_iter.bi_size != len) { + if (map_data && map_data->null_mapped) + bio->bi_flags |= (1 << BIO_NULL_MAPPED); + + if (bio->bi_iter.bi_size != iter->count) { /* * Grab an extra reference to this bio, as bio_unmap_user() * expects to be able to drop it twice as it happens on the @@ -241,6 +118,21 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL(blk_rq_map_user_iov); +int blk_rq_map_user(struct request_queue *q, struct request *rq, + struct rq_map_data *map_data, void __user *ubuf, + unsigned long len, gfp_t gfp_mask) +{ + struct iovec iov; + struct iov_iter i; + + iov.iov_base = ubuf; + iov.iov_len = len; + iov_iter_init(&i, rq_data_dir(rq), &iov, 1, len); + + return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask); +} +EXPORT_SYMBOL(blk_rq_map_user); + /** * blk_rq_unmap_user - unmap a request with user data * @bio: start of bio list diff --git a/block/blk-merge.c b/block/blk-merge.c index 89b97b5e0881..fc1ff3b1ea1f 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -283,35 +283,6 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL(blk_rq_map_sg); -/** - * blk_bio_map_sg - map a bio to a scatterlist - * @q: request_queue in question - * @bio: bio being mapped - * @sglist: scatterlist being mapped - * - * Note: - * Caller must make sure sg can hold bio->bi_phys_segments entries - * - * Will return the number of sg entries setup - */ -int blk_bio_map_sg(struct request_queue *q, struct bio *bio, - struct scatterlist *sglist) -{ - struct scatterlist *sg = NULL; - int nsegs; - struct bio *next = bio->bi_next; - bio->bi_next = NULL; - - nsegs = __blk_bios_map_sg(q, bio, sglist, &sg); - bio->bi_next = next; - if (sg) - sg_mark_end(sg); - - BUG_ON(bio->bi_phys_segments && nsegs > bio->bi_phys_segments); - return nsegs; -} -EXPORT_SYMBOL(blk_bio_map_sg); - static inline int ll_new_hw_segment(struct request_queue *q, struct request *req, struct bio *bio) @@ -385,6 +356,14 @@ static bool req_no_special_merge(struct request *req) return !q->mq_ops && req->special; } +static int req_gap_to_prev(struct request *req, struct request *next) +{ + struct bio *prev = req->biotail; + + return bvec_gap_to_prev(&prev->bi_io_vec[prev->bi_vcnt - 1], + next->bio->bi_io_vec[0].bv_offset); +} + static int ll_merge_requests_fn(struct request_queue *q, struct request *req, struct request *next) { @@ -399,6 +378,10 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, if (req_no_special_merge(req) || req_no_special_merge(next)) return 0; + if (test_bit(QUEUE_FLAG_SG_GAPS, &q->queue_flags) && + req_gap_to_prev(req, next)) + return 0; + /* * Will it become too large? */ diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index e3387a74a9a2..d53a764b05ea 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -524,6 +524,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL); if (!bt->bs) { kfree(bt->map); + bt->map = NULL; return -ENOMEM; } diff --git a/block/blk-mq.c b/block/blk-mq.c index eb8e694fda06..4f4bea21052e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -33,6 +33,7 @@ static DEFINE_MUTEX(all_q_mutex); static LIST_HEAD(all_q_list); static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); +static void blk_mq_run_queues(struct request_queue *q); /* * Check if any of the ctx's have pending work in this hardware queue @@ -117,7 +118,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q) if (freeze) { percpu_ref_kill(&q->mq_usage_counter); - blk_mq_run_queues(q, false); + blk_mq_run_queues(q); } } EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); @@ -161,6 +162,13 @@ void blk_mq_wake_waiters(struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) if (blk_mq_hw_queue_mapped(hctx)) blk_mq_tag_wakeup_all(hctx->tags, true); + + /* + * If we are called because the queue has now been marked as + * dying, we need to ensure that processes currently waiting on + * the queue are notified as well. + */ + wake_up_all(&q->mq_freeze_wq); } bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) @@ -398,6 +406,12 @@ void blk_mq_complete_request(struct request *rq) } EXPORT_SYMBOL(blk_mq_complete_request); +int blk_mq_request_started(struct request *rq) +{ + return test_bit(REQ_ATOM_STARTED, &rq->atomic_flags); +} +EXPORT_SYMBOL_GPL(blk_mq_request_started); + void blk_mq_start_request(struct request *rq) { struct request_queue *q = rq->q; @@ -515,12 +529,38 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) } EXPORT_SYMBOL(blk_mq_add_to_requeue_list); +void blk_mq_cancel_requeue_work(struct request_queue *q) +{ + cancel_work_sync(&q->requeue_work); +} +EXPORT_SYMBOL_GPL(blk_mq_cancel_requeue_work); + void blk_mq_kick_requeue_list(struct request_queue *q) { kblockd_schedule_work(&q->requeue_work); } EXPORT_SYMBOL(blk_mq_kick_requeue_list); +void blk_mq_abort_requeue_list(struct request_queue *q) +{ + unsigned long flags; + LIST_HEAD(rq_list); + + spin_lock_irqsave(&q->requeue_lock, flags); + list_splice_init(&q->requeue_list, &rq_list); + spin_unlock_irqrestore(&q->requeue_lock, flags); + + while (!list_empty(&rq_list)) { + struct request *rq; + + rq = list_first_entry(&rq_list, struct request, queuelist); + list_del_init(&rq->queuelist); + rq->errors = -EIO; + blk_mq_end_request(rq, rq->errors); + } +} +EXPORT_SYMBOL(blk_mq_abort_requeue_list); + static inline bool is_flush_request(struct request *rq, struct blk_flush_queue *fq, unsigned int tag) { @@ -581,13 +621,24 @@ void blk_mq_rq_timed_out(struct request *req, bool reserved) break; } } - + static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, struct request *rq, void *priv, bool reserved) { struct blk_mq_timeout_data *data = priv; - if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) + if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) { + /* + * If a request wasn't started before the queue was + * marked dying, kill it here or it'll go unnoticed. + */ + if (unlikely(blk_queue_dying(rq->q))) { + rq->errors = -EIO; + blk_mq_complete_request(rq); + } + return; + } + if (rq->cmd_flags & REQ_NO_TIMEOUT) return; if (time_after_eq(jiffies, rq->deadline)) { @@ -853,7 +904,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) &hctx->run_work, 0); } -void blk_mq_run_queues(struct request_queue *q, bool async) +static void blk_mq_run_queues(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; int i; @@ -864,10 +915,9 @@ void blk_mq_run_queues(struct request_queue *q, bool async) test_bit(BLK_MQ_S_STOPPED, &hctx->state)) continue; - blk_mq_run_hw_queue(hctx, async); + blk_mq_run_hw_queue(hctx, false); } } -EXPORT_SYMBOL(blk_mq_run_queues); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) { @@ -905,7 +955,6 @@ void blk_mq_start_hw_queues(struct request_queue *q) } EXPORT_SYMBOL(blk_mq_start_hw_queues); - void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async) { struct blk_mq_hw_ctx *hctx; @@ -1593,10 +1642,8 @@ static void blk_mq_free_hw_queues(struct request_queue *q, struct blk_mq_hw_ctx *hctx; unsigned int i; - queue_for_each_hw_ctx(q, hctx, i) { + queue_for_each_hw_ctx(q, hctx, i) free_cpumask_var(hctx->cpumask); - kfree(hctx); - } } static int blk_mq_init_hctx(struct request_queue *q, @@ -1617,7 +1664,6 @@ static int blk_mq_init_hctx(struct request_queue *q, hctx->queue = q; hctx->queue_num = hctx_idx; hctx->flags = set->flags; - hctx->cmd_size = set->cmd_size; blk_mq_init_cpu_notifier(&hctx->cpu_notifier, blk_mq_hctx_notify, hctx); @@ -1822,6 +1868,27 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, mutex_unlock(&set->tag_list_lock); } +/* + * It is the actual release handler for mq, but we do it from + * request queue's release handler for avoiding use-after-free + * and headache because q->mq_kobj shouldn't have been introduced, + * but we can't group ctx/kctx kobj without it. + */ +void blk_mq_release(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + unsigned int i; + + /* hctx kobj stays in hctx */ + queue_for_each_hw_ctx(q, hctx, i) + kfree(hctx); + + kfree(q->queue_hw_ctx); + + /* ctx kobj stays in queue_ctx */ + free_percpu(q->queue_ctx); +} + struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) { struct blk_mq_hw_ctx **hctxs; @@ -1955,12 +2022,8 @@ void blk_mq_free_queue(struct request_queue *q) percpu_ref_exit(&q->mq_usage_counter); - free_percpu(q->queue_ctx); - kfree(q->queue_hw_ctx); kfree(q->mq_map); - q->queue_ctx = NULL; - q->queue_hw_ctx = NULL; q->mq_map = NULL; mutex_lock(&all_q_mutex); diff --git a/block/blk-mq.h b/block/blk-mq.h index 4f4f943c22c3..6a48c4c0d8a2 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -62,6 +62,8 @@ extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_rq_timed_out(struct request *req, bool reserved); +void blk_mq_release(struct request_queue *q); + /* * Basic implementation of sparser bitmap, allowing the user to spread * the bits over more cachelines. diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 935ea2aa0730..faaf36ade7eb 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -517,6 +517,8 @@ static void blk_release_queue(struct kobject *kobj) if (!q->mq_ops) blk_free_flush_queue(q->fq); + else + blk_mq_release(q); blk_trace_shutdown(q); diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 56c025894cdf..246dfb16c3d9 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -190,6 +190,9 @@ void blk_add_timer(struct request *req) struct request_queue *q = req->q; unsigned long expiry; + if (req->cmd_flags & REQ_NO_TIMEOUT) + return; + /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */ if (!q->mq_ops && !q->rq_timed_out_fn) return; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index b9abdca84c17..5da8e6e9ab4b 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3590,6 +3590,11 @@ retry: blkcg = bio_blkcg(bio); cfqg = cfq_lookup_create_cfqg(cfqd, blkcg); + if (!cfqg) { + cfqq = &cfqd->oom_cfqq; + goto out; + } + cfqq = cic_to_cfqq(cic, is_sync); /* @@ -3626,7 +3631,7 @@ retry: } else cfqq = &cfqd->oom_cfqq; } - +out: if (new_cfqq) kmem_cache_free(cfq_pool, new_cfqq); diff --git a/block/partitions/efi.c b/block/partitions/efi.c index 56d08fd75b1a..26cb624ace05 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -715,7 +715,7 @@ int efi_partition(struct parsed_partitions *state) state->parts[i + 1].flags = ADDPART_FLAG_RAID; info = &state->parts[i + 1].info; - efi_guid_unparse(&ptes[i].unique_partition_guid, info->uuid); + efi_guid_to_str(&ptes[i].unique_partition_guid, info->uuid); /* Naively convert UTF16-LE to 7 bits. */ label_max = min(ARRAY_SIZE(info->volname) - 1, diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 28163fad3c5d..e1f71c396193 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -332,7 +332,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, ret = 0; if (hdr->iovec_count) { - size_t iov_data_len; + struct iov_iter i; struct iovec *iov = NULL; ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count, @@ -342,20 +342,11 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, goto out_free_cdb; } - iov_data_len = ret; - ret = 0; - /* SG_IO howto says that the shorter of the two wins */ - if (hdr->dxfer_len < iov_data_len) { - hdr->iovec_count = iov_shorten(iov, - hdr->iovec_count, - hdr->dxfer_len); - iov_data_len = hdr->dxfer_len; - } + iov_iter_init(&i, rq_data_dir(rq), iov, hdr->iovec_count, + min_t(unsigned, ret, hdr->dxfer_len)); - ret = blk_rq_map_user_iov(q, rq, NULL, (struct sg_iovec *) iov, - hdr->iovec_count, - iov_data_len, GFP_KERNEL); + ret = blk_rq_map_user_iov(q, rq, NULL, &i, GFP_KERNEL); kfree(iov); } else if (hdr->dxfer_len) ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len, |