diff options
author | Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> | 2021-10-01 21:38:40 +0300 |
---|---|---|
committer | Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> | 2021-10-01 21:40:21 +0300 |
commit | 9c881021a269af242594e2dfc79f1c4701404887 (patch) | |
tree | c8ec14f412d7ea35009b2dee08770082ddbb5c6e /block | |
parent | e9479d98b87227b8b7502c4c1e778887b23799f1 (diff) | |
parent | cf06e1ab1c3ed354da5873e646f2164fea147c88 (diff) | |
download | linux-dev-5.10-intel.tar.xz |
Merge branch 'dev-5.10' into dev-5.10-inteldev-5.10-intel
Pull 5.10.67 stable from OpenBMC upstream.
Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/bfq-iosched.c | 5 | ||||
-rw-r--r-- | block/bio.c | 15 | ||||
-rw-r--r-- | block/blk-core.c | 1 | ||||
-rw-r--r-- | block/blk-crypto.c | 2 | ||||
-rw-r--r-- | block/blk-flush.c | 13 | ||||
-rw-r--r-- | block/blk-iocost.c | 8 | ||||
-rw-r--r-- | block/blk-merge.c | 18 | ||||
-rw-r--r-- | block/blk-mq.c | 67 | ||||
-rw-r--r-- | block/blk-throttle.c | 32 | ||||
-rw-r--r-- | block/blk-zoned.c | 6 | ||||
-rw-r--r-- | block/blk.h | 8 | ||||
-rw-r--r-- | block/bsg.c | 5 | ||||
-rw-r--r-- | block/elevator.c | 3 | ||||
-rw-r--r-- | block/mq-deadline.c | 2 |
14 files changed, 121 insertions, 64 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index c91dca641eb4..b8c2ddc01aec 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2251,6 +2251,9 @@ static int bfq_request_merge(struct request_queue *q, struct request **req, __rq = bfq_find_rq_fmerge(bfqd, bio, q); if (__rq && elv_bio_merge_ok(__rq, bio)) { *req = __rq; + + if (blk_discard_mergable(__rq)) + return ELEVATOR_DISCARD_MERGE; return ELEVATOR_FRONT_MERGE; } @@ -5008,7 +5011,7 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) if (bfqq->new_ioprio >= IOPRIO_BE_NR) { pr_crit("bfq_set_next_ioprio_data: new_ioprio %d\n", bfqq->new_ioprio); - bfqq->new_ioprio = IOPRIO_BE_NR; + bfqq->new_ioprio = IOPRIO_BE_NR - 1; } bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio); diff --git a/block/bio.c b/block/bio.c index 9c931df2d986..0703a208ca24 100644 --- a/block/bio.c +++ b/block/bio.c @@ -978,6 +978,14 @@ static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter) return 0; } +static void bio_put_pages(struct page **pages, size_t size, size_t off) +{ + size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE); + + for (i = 0; i < nr; i++) + put_page(pages[i]); +} + #define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) /** @@ -1022,8 +1030,10 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) if (same_page) put_page(page); } else { - if (WARN_ON_ONCE(bio_full(bio, len))) - return -EINVAL; + if (WARN_ON_ONCE(bio_full(bio, len))) { + bio_put_pages(pages + i, left, offset); + return -EINVAL; + } __bio_add_page(bio, page, len, offset); } offset = 0; @@ -1068,6 +1078,7 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter) len = min_t(size_t, PAGE_SIZE - offset, left); if (bio_add_hw_page(q, bio, page, len, offset, max_append_sectors, &same_page) != len) { + bio_put_pages(pages + i, left, offset); ret = -EINVAL; break; } diff --git a/block/blk-core.c b/block/blk-core.c index 2d53e2ff48ff..fbc39756f37d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -121,7 +121,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->internal_tag = BLK_MQ_NO_TAG; rq->start_time_ns = ktime_get_ns(); rq->part = NULL; - refcount_set(&rq->ref, 1); blk_crypto_rq_set_defaults(rq); } EXPORT_SYMBOL(blk_rq_init); diff --git a/block/blk-crypto.c b/block/blk-crypto.c index 5da43f0973b4..5ffa9aab49de 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -332,7 +332,7 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, if (mode->keysize == 0) return -EINVAL; - if (dun_bytes == 0 || dun_bytes > BLK_CRYPTO_MAX_IV_SIZE) + if (dun_bytes == 0 || dun_bytes > mode->ivsize) return -EINVAL; if (!is_power_of_2(data_unit_size)) diff --git a/block/blk-flush.c b/block/blk-flush.c index 7ee7e5e8905d..70f1d02135ed 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -263,6 +263,11 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) spin_unlock_irqrestore(&fq->mq_flush_lock, flags); } +bool is_flush_rq(struct request *rq) +{ + return rq->end_io == flush_end_io; +} + /** * blk_kick_flush - consider issuing flush request * @q: request_queue being kicked @@ -330,6 +335,14 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, flush_rq->rq_flags |= RQF_FLUSH_SEQ; flush_rq->rq_disk = first_rq->rq_disk; flush_rq->end_io = flush_end_io; + /* + * Order WRITE ->end_io and WRITE rq->ref, and its pair is the one + * implied in refcount_inc_not_zero() called from + * blk_mq_find_and_get_req(), which orders WRITE/READ flush_rq->ref + * and READ flush_rq->end_io + */ + smp_wmb(); + refcount_set(&flush_rq->ref, 1); blk_flush_queue_rq(flush_rq, false); } diff --git a/block/blk-iocost.c b/block/blk-iocost.c index b7d8a954d99c..e95b93f72bd5 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -3039,19 +3039,19 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf, if (v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX) return -EINVAL; - spin_lock(&blkcg->lock); + spin_lock_irq(&blkcg->lock); iocc->dfl_weight = v * WEIGHT_ONE; hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { struct ioc_gq *iocg = blkg_to_iocg(blkg); if (iocg) { - spin_lock_irq(&iocg->ioc->lock); + spin_lock(&iocg->ioc->lock); ioc_now(iocg->ioc, &now); weight_updated(iocg, &now); - spin_unlock_irq(&iocg->ioc->lock); + spin_unlock(&iocg->ioc->lock); } } - spin_unlock(&blkcg->lock); + spin_unlock_irq(&blkcg->lock); return nbytes; } diff --git a/block/blk-merge.c b/block/blk-merge.c index 349cd7d3af81..26f4bcc10de9 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -341,6 +341,8 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs) trace_block_split(q, split, (*bio)->bi_iter.bi_sector); submit_bio_noacct(*bio); *bio = split; + + blk_throtl_charge_bio_split(*bio); } } @@ -700,22 +702,6 @@ static void blk_account_io_merge_request(struct request *req) } } -/* - * Two cases of handling DISCARD merge: - * If max_discard_segments > 1, the driver takes every bio - * as a range and send them to controller together. The ranges - * needn't to be contiguous. - * Otherwise, the bios/requests will be handled as same as - * others which should be contiguous. - */ -static inline bool blk_discard_mergable(struct request *req) -{ - if (req_op(req) == REQ_OP_DISCARD && - queue_max_discard_segments(req->q) > 1) - return true; - return false; -} - static enum elv_merge blk_try_req_merge(struct request *req, struct request *next) { diff --git a/block/blk-mq.c b/block/blk-mq.c index a368eb6dc647..9e3fedbaa644 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -929,7 +929,7 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next) void blk_mq_put_rq_ref(struct request *rq) { - if (is_flush_rq(rq, rq->mq_hctx)) + if (is_flush_rq(rq)) rq->end_io(rq, 0); else if (refcount_dec_and_test(&rq->ref)) __blk_mq_free_request(rq); @@ -941,34 +941,14 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, unsigned long *next = priv; /* - * Just do a quick check if it is expired before locking the request in - * so we're not unnecessarilly synchronizing across CPUs. - */ - if (!blk_mq_req_expired(rq, next)) - return true; - - /* - * We have reason to believe the request may be expired. Take a - * reference on the request to lock this request lifetime into its - * currently allocated context to prevent it from being reallocated in - * the event the completion by-passes this timeout handler. - * - * If the reference was already released, then the driver beat the - * timeout handler to posting a natural completion. - */ - if (!refcount_inc_not_zero(&rq->ref)) - return true; - - /* - * The request is now locked and cannot be reallocated underneath the - * timeout handler's processing. Re-verify this exact request is truly - * expired; if it is not expired, then the request was completed and - * reallocated as a new request. + * blk_mq_queue_tag_busy_iter() has locked the request, so it cannot + * be reallocated underneath the timeout handler's processing, then + * the expire check is reliable. If the request is not expired, then + * it was completed and reallocated as a new request after returning + * from blk_mq_check_expired(). */ if (blk_mq_req_expired(rq, next)) blk_mq_rq_timed_out(rq, reserved); - - blk_mq_put_rq_ref(rq); return true; } @@ -2609,16 +2589,49 @@ static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx) &hctx->cpuhp_dead); } +/* + * Before freeing hw queue, clearing the flush request reference in + * tags->rqs[] for avoiding potential UAF. + */ +static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags, + unsigned int queue_depth, struct request *flush_rq) +{ + int i; + unsigned long flags; + + /* The hw queue may not be mapped yet */ + if (!tags) + return; + + WARN_ON_ONCE(refcount_read(&flush_rq->ref) != 0); + + for (i = 0; i < queue_depth; i++) + cmpxchg(&tags->rqs[i], flush_rq, NULL); + + /* + * Wait until all pending iteration is done. + * + * Request reference is cleared and it is guaranteed to be observed + * after the ->lock is released. + */ + spin_lock_irqsave(&tags->lock, flags); + spin_unlock_irqrestore(&tags->lock, flags); +} + /* hctx->ctxs will be freed in queue's release handler */ static void blk_mq_exit_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { + struct request *flush_rq = hctx->fq->flush_rq; + if (blk_mq_hw_queue_mapped(hctx)) blk_mq_tag_idle(hctx); + blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx], + set->queue_depth, flush_rq); if (set->ops->exit_request) - set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx); + set->ops->exit_request(set, flush_rq, hctx_idx); if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index b771c4299982..63e9d00a0832 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -178,6 +178,9 @@ struct throtl_grp { unsigned int bad_bio_cnt; /* bios exceeding latency threshold */ unsigned long bio_cnt_reset_time; + atomic_t io_split_cnt[2]; + atomic_t last_io_split_cnt[2]; + struct blkg_rwstat stat_bytes; struct blkg_rwstat stat_ios; }; @@ -771,6 +774,8 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg, tg->bytes_disp[rw] = 0; tg->io_disp[rw] = 0; + atomic_set(&tg->io_split_cnt[rw], 0); + /* * Previous slice has expired. We must have trimmed it after last * bio dispatch. That means since start of last slice, we never used @@ -793,6 +798,9 @@ static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw) tg->io_disp[rw] = 0; tg->slice_start[rw] = jiffies; tg->slice_end[rw] = jiffies + tg->td->throtl_slice; + + atomic_set(&tg->io_split_cnt[rw], 0); + throtl_log(&tg->service_queue, "[%c] new slice start=%lu end=%lu jiffies=%lu", rw == READ ? 'R' : 'W', tg->slice_start[rw], @@ -1025,6 +1033,9 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, jiffies + tg->td->throtl_slice); } + if (iops_limit != UINT_MAX) + tg->io_disp[rw] += atomic_xchg(&tg->io_split_cnt[rw], 0); + if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) && tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) { if (wait) @@ -2046,12 +2057,14 @@ static void throtl_downgrade_check(struct throtl_grp *tg) } if (tg->iops[READ][LIMIT_LOW]) { + tg->last_io_disp[READ] += atomic_xchg(&tg->last_io_split_cnt[READ], 0); iops = tg->last_io_disp[READ] * HZ / elapsed_time; if (iops >= tg->iops[READ][LIMIT_LOW]) tg->last_low_overflow_time[READ] = now; } if (tg->iops[WRITE][LIMIT_LOW]) { + tg->last_io_disp[WRITE] += atomic_xchg(&tg->last_io_split_cnt[WRITE], 0); iops = tg->last_io_disp[WRITE] * HZ / elapsed_time; if (iops >= tg->iops[WRITE][LIMIT_LOW]) tg->last_low_overflow_time[WRITE] = now; @@ -2170,6 +2183,25 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) } #endif +void blk_throtl_charge_bio_split(struct bio *bio) +{ + struct blkcg_gq *blkg = bio->bi_blkg; + struct throtl_grp *parent = blkg_to_tg(blkg); + struct throtl_service_queue *parent_sq; + bool rw = bio_data_dir(bio); + + do { + if (!parent->has_rules[rw]) + break; + + atomic_inc(&parent->io_split_cnt[rw]); + atomic_inc(&parent->last_io_split_cnt[rw]); + + parent_sq = parent->service_queue.parent_sq; + parent = sq_to_tg(parent_sq); + } while (parent); +} + bool blk_throtl_bio(struct bio *bio) { struct request_queue *q = bio->bi_disk->queue; diff --git a/block/blk-zoned.c b/block/blk-zoned.c index ab7d7ebcf6dd..61b452272f94 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -296,9 +296,6 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, if (!blk_queue_is_zoned(q)) return -ENOTTY; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report))) return -EFAULT; @@ -357,9 +354,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, if (!blk_queue_is_zoned(q)) return -ENOTTY; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - if (!(mode & FMODE_WRITE)) return -EBADF; diff --git a/block/blk.h b/block/blk.h index dfab98465db9..f84c83300f6f 100644 --- a/block/blk.h +++ b/block/blk.h @@ -44,11 +44,7 @@ static inline void __blk_get_queue(struct request_queue *q) kobject_get(&q->kobj); } -static inline bool -is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx) -{ - return hctx->fq->flush_rq == req; -} +bool is_flush_rq(struct request *req); struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, gfp_t flags); @@ -303,11 +299,13 @@ int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node); extern int blk_throtl_init(struct request_queue *q); extern void blk_throtl_exit(struct request_queue *q); extern void blk_throtl_register_queue(struct request_queue *q); +extern void blk_throtl_charge_bio_split(struct bio *bio); bool blk_throtl_bio(struct bio *bio); #else /* CONFIG_BLK_DEV_THROTTLING */ static inline int blk_throtl_init(struct request_queue *q) { return 0; } static inline void blk_throtl_exit(struct request_queue *q) { } static inline void blk_throtl_register_queue(struct request_queue *q) { } +static inline void blk_throtl_charge_bio_split(struct bio *bio) { } static inline bool blk_throtl_bio(struct bio *bio) { return false; } #endif /* CONFIG_BLK_DEV_THROTTLING */ #ifdef CONFIG_BLK_DEV_THROTTLING_LOW diff --git a/block/bsg.c b/block/bsg.c index 3d78e843a83f..2cbc1fcc8247 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -371,10 +371,13 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case SG_GET_RESERVED_SIZE: case SG_SET_RESERVED_SIZE: case SG_EMULATED_HOST: - case SCSI_IOCTL_SEND_COMMAND: return scsi_cmd_ioctl(bd->queue, NULL, file->f_mode, cmd, uarg); case SG_IO: return bsg_sg_io(bd->queue, file->f_mode, uarg); + case SCSI_IOCTL_SEND_COMMAND: + pr_warn_ratelimited("%s: calling unsupported SCSI_IOCTL_SEND_COMMAND\n", + current->comm); + return -EINVAL; default: return -ENOTTY; } diff --git a/block/elevator.c b/block/elevator.c index 293c5c81397a..2a525863d4e9 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -336,6 +336,9 @@ enum elv_merge elv_merge(struct request_queue *q, struct request **req, __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector); if (__rq && elv_bio_merge_ok(__rq, bio)) { *req = __rq; + + if (blk_discard_mergable(__rq)) + return ELEVATOR_DISCARD_MERGE; return ELEVATOR_BACK_MERGE; } diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 2b9635d0dcba..e4e90761eab3 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -454,6 +454,8 @@ static int dd_request_merge(struct request_queue *q, struct request **rq, if (elv_bio_merge_ok(__rq, bio)) { *rq = __rq; + if (blk_discard_mergable(__rq)) + return ELEVATOR_DISCARD_MERGE; return ELEVATOR_FRONT_MERGE; } } |