summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorJae Hyun Yoo <jae.hyun.yoo@linux.intel.com>2021-10-01 21:38:40 +0300
committerJae Hyun Yoo <jae.hyun.yoo@linux.intel.com>2021-10-01 21:40:21 +0300
commit9c881021a269af242594e2dfc79f1c4701404887 (patch)
treec8ec14f412d7ea35009b2dee08770082ddbb5c6e /block
parente9479d98b87227b8b7502c4c1e778887b23799f1 (diff)
parentcf06e1ab1c3ed354da5873e646f2164fea147c88 (diff)
downloadlinux-dev-5.10-intel.tar.xz
Merge branch 'dev-5.10' into dev-5.10-inteldev-5.10-intel
Pull 5.10.67 stable from OpenBMC upstream. Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
Diffstat (limited to 'block')
-rw-r--r--block/bfq-iosched.c5
-rw-r--r--block/bio.c15
-rw-r--r--block/blk-core.c1
-rw-r--r--block/blk-crypto.c2
-rw-r--r--block/blk-flush.c13
-rw-r--r--block/blk-iocost.c8
-rw-r--r--block/blk-merge.c18
-rw-r--r--block/blk-mq.c67
-rw-r--r--block/blk-throttle.c32
-rw-r--r--block/blk-zoned.c6
-rw-r--r--block/blk.h8
-rw-r--r--block/bsg.c5
-rw-r--r--block/elevator.c3
-rw-r--r--block/mq-deadline.c2
14 files changed, 121 insertions, 64 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index c91dca641eb4..b8c2ddc01aec 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2251,6 +2251,9 @@ static int bfq_request_merge(struct request_queue *q, struct request **req,
__rq = bfq_find_rq_fmerge(bfqd, bio, q);
if (__rq && elv_bio_merge_ok(__rq, bio)) {
*req = __rq;
+
+ if (blk_discard_mergable(__rq))
+ return ELEVATOR_DISCARD_MERGE;
return ELEVATOR_FRONT_MERGE;
}
@@ -5008,7 +5011,7 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
if (bfqq->new_ioprio >= IOPRIO_BE_NR) {
pr_crit("bfq_set_next_ioprio_data: new_ioprio %d\n",
bfqq->new_ioprio);
- bfqq->new_ioprio = IOPRIO_BE_NR;
+ bfqq->new_ioprio = IOPRIO_BE_NR - 1;
}
bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio);
diff --git a/block/bio.c b/block/bio.c
index 9c931df2d986..0703a208ca24 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -978,6 +978,14 @@ static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
return 0;
}
+static void bio_put_pages(struct page **pages, size_t size, size_t off)
+{
+ size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE);
+
+ for (i = 0; i < nr; i++)
+ put_page(pages[i]);
+}
+
#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
/**
@@ -1022,8 +1030,10 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
if (same_page)
put_page(page);
} else {
- if (WARN_ON_ONCE(bio_full(bio, len)))
- return -EINVAL;
+ if (WARN_ON_ONCE(bio_full(bio, len))) {
+ bio_put_pages(pages + i, left, offset);
+ return -EINVAL;
+ }
__bio_add_page(bio, page, len, offset);
}
offset = 0;
@@ -1068,6 +1078,7 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter)
len = min_t(size_t, PAGE_SIZE - offset, left);
if (bio_add_hw_page(q, bio, page, len, offset,
max_append_sectors, &same_page) != len) {
+ bio_put_pages(pages + i, left, offset);
ret = -EINVAL;
break;
}
diff --git a/block/blk-core.c b/block/blk-core.c
index 2d53e2ff48ff..fbc39756f37d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -121,7 +121,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->internal_tag = BLK_MQ_NO_TAG;
rq->start_time_ns = ktime_get_ns();
rq->part = NULL;
- refcount_set(&rq->ref, 1);
blk_crypto_rq_set_defaults(rq);
}
EXPORT_SYMBOL(blk_rq_init);
diff --git a/block/blk-crypto.c b/block/blk-crypto.c
index 5da43f0973b4..5ffa9aab49de 100644
--- a/block/blk-crypto.c
+++ b/block/blk-crypto.c
@@ -332,7 +332,7 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key,
if (mode->keysize == 0)
return -EINVAL;
- if (dun_bytes == 0 || dun_bytes > BLK_CRYPTO_MAX_IV_SIZE)
+ if (dun_bytes == 0 || dun_bytes > mode->ivsize)
return -EINVAL;
if (!is_power_of_2(data_unit_size))
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 7ee7e5e8905d..70f1d02135ed 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -263,6 +263,11 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
}
+bool is_flush_rq(struct request *rq)
+{
+ return rq->end_io == flush_end_io;
+}
+
/**
* blk_kick_flush - consider issuing flush request
* @q: request_queue being kicked
@@ -330,6 +335,14 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
flush_rq->rq_flags |= RQF_FLUSH_SEQ;
flush_rq->rq_disk = first_rq->rq_disk;
flush_rq->end_io = flush_end_io;
+ /*
+ * Order WRITE ->end_io and WRITE rq->ref, and its pair is the one
+ * implied in refcount_inc_not_zero() called from
+ * blk_mq_find_and_get_req(), which orders WRITE/READ flush_rq->ref
+ * and READ flush_rq->end_io
+ */
+ smp_wmb();
+ refcount_set(&flush_rq->ref, 1);
blk_flush_queue_rq(flush_rq, false);
}
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index b7d8a954d99c..e95b93f72bd5 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3039,19 +3039,19 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
if (v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX)
return -EINVAL;
- spin_lock(&blkcg->lock);
+ spin_lock_irq(&blkcg->lock);
iocc->dfl_weight = v * WEIGHT_ONE;
hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
struct ioc_gq *iocg = blkg_to_iocg(blkg);
if (iocg) {
- spin_lock_irq(&iocg->ioc->lock);
+ spin_lock(&iocg->ioc->lock);
ioc_now(iocg->ioc, &now);
weight_updated(iocg, &now);
- spin_unlock_irq(&iocg->ioc->lock);
+ spin_unlock(&iocg->ioc->lock);
}
}
- spin_unlock(&blkcg->lock);
+ spin_unlock_irq(&blkcg->lock);
return nbytes;
}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 349cd7d3af81..26f4bcc10de9 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -341,6 +341,8 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
submit_bio_noacct(*bio);
*bio = split;
+
+ blk_throtl_charge_bio_split(*bio);
}
}
@@ -700,22 +702,6 @@ static void blk_account_io_merge_request(struct request *req)
}
}
-/*
- * Two cases of handling DISCARD merge:
- * If max_discard_segments > 1, the driver takes every bio
- * as a range and send them to controller together. The ranges
- * needn't to be contiguous.
- * Otherwise, the bios/requests will be handled as same as
- * others which should be contiguous.
- */
-static inline bool blk_discard_mergable(struct request *req)
-{
- if (req_op(req) == REQ_OP_DISCARD &&
- queue_max_discard_segments(req->q) > 1)
- return true;
- return false;
-}
-
static enum elv_merge blk_try_req_merge(struct request *req,
struct request *next)
{
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a368eb6dc647..9e3fedbaa644 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -929,7 +929,7 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
void blk_mq_put_rq_ref(struct request *rq)
{
- if (is_flush_rq(rq, rq->mq_hctx))
+ if (is_flush_rq(rq))
rq->end_io(rq, 0);
else if (refcount_dec_and_test(&rq->ref))
__blk_mq_free_request(rq);
@@ -941,34 +941,14 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
unsigned long *next = priv;
/*
- * Just do a quick check if it is expired before locking the request in
- * so we're not unnecessarilly synchronizing across CPUs.
- */
- if (!blk_mq_req_expired(rq, next))
- return true;
-
- /*
- * We have reason to believe the request may be expired. Take a
- * reference on the request to lock this request lifetime into its
- * currently allocated context to prevent it from being reallocated in
- * the event the completion by-passes this timeout handler.
- *
- * If the reference was already released, then the driver beat the
- * timeout handler to posting a natural completion.
- */
- if (!refcount_inc_not_zero(&rq->ref))
- return true;
-
- /*
- * The request is now locked and cannot be reallocated underneath the
- * timeout handler's processing. Re-verify this exact request is truly
- * expired; if it is not expired, then the request was completed and
- * reallocated as a new request.
+ * blk_mq_queue_tag_busy_iter() has locked the request, so it cannot
+ * be reallocated underneath the timeout handler's processing, then
+ * the expire check is reliable. If the request is not expired, then
+ * it was completed and reallocated as a new request after returning
+ * from blk_mq_check_expired().
*/
if (blk_mq_req_expired(rq, next))
blk_mq_rq_timed_out(rq, reserved);
-
- blk_mq_put_rq_ref(rq);
return true;
}
@@ -2609,16 +2589,49 @@ static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
&hctx->cpuhp_dead);
}
+/*
+ * Before freeing hw queue, clearing the flush request reference in
+ * tags->rqs[] for avoiding potential UAF.
+ */
+static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags,
+ unsigned int queue_depth, struct request *flush_rq)
+{
+ int i;
+ unsigned long flags;
+
+ /* The hw queue may not be mapped yet */
+ if (!tags)
+ return;
+
+ WARN_ON_ONCE(refcount_read(&flush_rq->ref) != 0);
+
+ for (i = 0; i < queue_depth; i++)
+ cmpxchg(&tags->rqs[i], flush_rq, NULL);
+
+ /*
+ * Wait until all pending iteration is done.
+ *
+ * Request reference is cleared and it is guaranteed to be observed
+ * after the ->lock is released.
+ */
+ spin_lock_irqsave(&tags->lock, flags);
+ spin_unlock_irqrestore(&tags->lock, flags);
+}
+
/* hctx->ctxs will be freed in queue's release handler */
static void blk_mq_exit_hctx(struct request_queue *q,
struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
{
+ struct request *flush_rq = hctx->fq->flush_rq;
+
if (blk_mq_hw_queue_mapped(hctx))
blk_mq_tag_idle(hctx);
+ blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
+ set->queue_depth, flush_rq);
if (set->ops->exit_request)
- set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
+ set->ops->exit_request(set, flush_rq, hctx_idx);
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index b771c4299982..63e9d00a0832 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -178,6 +178,9 @@ struct throtl_grp {
unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
unsigned long bio_cnt_reset_time;
+ atomic_t io_split_cnt[2];
+ atomic_t last_io_split_cnt[2];
+
struct blkg_rwstat stat_bytes;
struct blkg_rwstat stat_ios;
};
@@ -771,6 +774,8 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
tg->bytes_disp[rw] = 0;
tg->io_disp[rw] = 0;
+ atomic_set(&tg->io_split_cnt[rw], 0);
+
/*
* Previous slice has expired. We must have trimmed it after last
* bio dispatch. That means since start of last slice, we never used
@@ -793,6 +798,9 @@ static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
tg->io_disp[rw] = 0;
tg->slice_start[rw] = jiffies;
tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
+
+ atomic_set(&tg->io_split_cnt[rw], 0);
+
throtl_log(&tg->service_queue,
"[%c] new slice start=%lu end=%lu jiffies=%lu",
rw == READ ? 'R' : 'W', tg->slice_start[rw],
@@ -1025,6 +1033,9 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
jiffies + tg->td->throtl_slice);
}
+ if (iops_limit != UINT_MAX)
+ tg->io_disp[rw] += atomic_xchg(&tg->io_split_cnt[rw], 0);
+
if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) &&
tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) {
if (wait)
@@ -2046,12 +2057,14 @@ static void throtl_downgrade_check(struct throtl_grp *tg)
}
if (tg->iops[READ][LIMIT_LOW]) {
+ tg->last_io_disp[READ] += atomic_xchg(&tg->last_io_split_cnt[READ], 0);
iops = tg->last_io_disp[READ] * HZ / elapsed_time;
if (iops >= tg->iops[READ][LIMIT_LOW])
tg->last_low_overflow_time[READ] = now;
}
if (tg->iops[WRITE][LIMIT_LOW]) {
+ tg->last_io_disp[WRITE] += atomic_xchg(&tg->last_io_split_cnt[WRITE], 0);
iops = tg->last_io_disp[WRITE] * HZ / elapsed_time;
if (iops >= tg->iops[WRITE][LIMIT_LOW])
tg->last_low_overflow_time[WRITE] = now;
@@ -2170,6 +2183,25 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
}
#endif
+void blk_throtl_charge_bio_split(struct bio *bio)
+{
+ struct blkcg_gq *blkg = bio->bi_blkg;
+ struct throtl_grp *parent = blkg_to_tg(blkg);
+ struct throtl_service_queue *parent_sq;
+ bool rw = bio_data_dir(bio);
+
+ do {
+ if (!parent->has_rules[rw])
+ break;
+
+ atomic_inc(&parent->io_split_cnt[rw]);
+ atomic_inc(&parent->last_io_split_cnt[rw]);
+
+ parent_sq = parent->service_queue.parent_sq;
+ parent = sq_to_tg(parent_sq);
+ } while (parent);
+}
+
bool blk_throtl_bio(struct bio *bio)
{
struct request_queue *q = bio->bi_disk->queue;
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index ab7d7ebcf6dd..61b452272f94 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -296,9 +296,6 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
if (!blk_queue_is_zoned(q))
return -ENOTTY;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
-
if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
return -EFAULT;
@@ -357,9 +354,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
if (!blk_queue_is_zoned(q))
return -ENOTTY;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
-
if (!(mode & FMODE_WRITE))
return -EBADF;
diff --git a/block/blk.h b/block/blk.h
index dfab98465db9..f84c83300f6f 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -44,11 +44,7 @@ static inline void __blk_get_queue(struct request_queue *q)
kobject_get(&q->kobj);
}
-static inline bool
-is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx)
-{
- return hctx->fq->flush_rq == req;
-}
+bool is_flush_rq(struct request *req);
struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
gfp_t flags);
@@ -303,11 +299,13 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
extern int blk_throtl_init(struct request_queue *q);
extern void blk_throtl_exit(struct request_queue *q);
extern void blk_throtl_register_queue(struct request_queue *q);
+extern void blk_throtl_charge_bio_split(struct bio *bio);
bool blk_throtl_bio(struct bio *bio);
#else /* CONFIG_BLK_DEV_THROTTLING */
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
static inline void blk_throtl_exit(struct request_queue *q) { }
static inline void blk_throtl_register_queue(struct request_queue *q) { }
+static inline void blk_throtl_charge_bio_split(struct bio *bio) { }
static inline bool blk_throtl_bio(struct bio *bio) { return false; }
#endif /* CONFIG_BLK_DEV_THROTTLING */
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
diff --git a/block/bsg.c b/block/bsg.c
index 3d78e843a83f..2cbc1fcc8247 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -371,10 +371,13 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case SG_GET_RESERVED_SIZE:
case SG_SET_RESERVED_SIZE:
case SG_EMULATED_HOST:
- case SCSI_IOCTL_SEND_COMMAND:
return scsi_cmd_ioctl(bd->queue, NULL, file->f_mode, cmd, uarg);
case SG_IO:
return bsg_sg_io(bd->queue, file->f_mode, uarg);
+ case SCSI_IOCTL_SEND_COMMAND:
+ pr_warn_ratelimited("%s: calling unsupported SCSI_IOCTL_SEND_COMMAND\n",
+ current->comm);
+ return -EINVAL;
default:
return -ENOTTY;
}
diff --git a/block/elevator.c b/block/elevator.c
index 293c5c81397a..2a525863d4e9 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -336,6 +336,9 @@ enum elv_merge elv_merge(struct request_queue *q, struct request **req,
__rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
if (__rq && elv_bio_merge_ok(__rq, bio)) {
*req = __rq;
+
+ if (blk_discard_mergable(__rq))
+ return ELEVATOR_DISCARD_MERGE;
return ELEVATOR_BACK_MERGE;
}
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 2b9635d0dcba..e4e90761eab3 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -454,6 +454,8 @@ static int dd_request_merge(struct request_queue *q, struct request **rq,
if (elv_bio_merge_ok(__rq, bio)) {
*rq = __rq;
+ if (blk_discard_mergable(__rq))
+ return ELEVATOR_DISCARD_MERGE;
return ELEVATOR_FRONT_MERGE;
}
}