From 85a953806557dbf25d16e8c132b5b9b100d16496 Mon Sep 17 00:00:00 2001 From: Elliot Berman Date: Mon, 10 Apr 2023 09:16:52 -0700 Subject: mailbox: Allow direct registration to a channel Support virtual mailbox controllers and clients which are not platform devices or come from the devicetree by allowing them to match client to channel via some other mechanism. Tested-by: Sudeep Holla (pcc) Signed-off-by: Elliot Berman Signed-off-by: Jassi Brar --- include/linux/mailbox_client.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h index 65229a45590f..734694912ef7 100644 --- a/include/linux/mailbox_client.h +++ b/include/linux/mailbox_client.h @@ -37,6 +37,7 @@ struct mbox_client { void (*tx_done)(struct mbox_client *cl, void *mssg, int r); }; +int mbox_bind_client(struct mbox_chan *chan, struct mbox_client *cl); struct mbox_chan *mbox_request_channel_byname(struct mbox_client *cl, const char *name); struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index); -- cgit v1.2.3 From ff53cd52d9bdbf4074d2bbe9b591729997780bd3 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 18 Mar 2023 17:36:25 +0000 Subject: blk-integrity: register sysfs attributes on struct device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "integrity" kobject only acted as a holder for static sysfs entries. It also was embedded into struct gendisk without managing it, violating assumptions of the driver core. Instead register the sysfs entries directly onto the struct device. Also drop the now unused member integrity_kobj from struct gendisk. Suggested-by: Christoph Hellwig Signed-off-by: Thomas Weißschuh Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20230309-kobj_release-gendisk_integrity-v3-3-ceccb4493c46@weissschuh.net Signed-off-by: Jens Axboe --- block/blk-integrity.c | 55 +++----------------------------------------------- block/blk.h | 10 +-------- block/genhd.c | 12 ++++------- include/linux/blkdev.h | 3 --- 4 files changed, 8 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 1cbfdea88c72..d4e9b4556d14 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -212,31 +212,6 @@ bool blk_integrity_merge_bio(struct request_queue *q, struct request *req, return true; } -static ssize_t integrity_attr_show(struct kobject *kobj, struct attribute *attr, - char *page) -{ - struct gendisk *disk = container_of(kobj, struct gendisk, integrity_kobj); - struct device *dev = disk_to_dev(disk); - struct device_attribute *dev_attr = - container_of(attr, struct device_attribute, attr); - - return dev_attr->show(dev, dev_attr, page); -} - -static ssize_t integrity_attr_store(struct kobject *kobj, - struct attribute *attr, const char *page, - size_t count) -{ - struct gendisk *disk = container_of(kobj, struct gendisk, integrity_kobj); - struct device *dev = disk_to_dev(disk); - struct device_attribute *dev_attr = - container_of(attr, struct device_attribute, attr); - - if (!dev_attr->store) - return 0; - return dev_attr->store(dev, dev_attr, page, count); -} - static inline struct blk_integrity *dev_to_bi(struct device *dev) { return &dev_to_disk(dev)->queue->integrity; @@ -345,16 +320,10 @@ static struct attribute *integrity_attrs[] = { &dev_attr_device_is_integrity_capable.attr, NULL }; -ATTRIBUTE_GROUPS(integrity); -static const struct sysfs_ops integrity_ops = { - .show = &integrity_attr_show, - .store = &integrity_attr_store, -}; - -static const struct kobj_type integrity_ktype = { - .default_groups = integrity_groups, - .sysfs_ops = &integrity_ops, +const struct attribute_group blk_integrity_attr_group = { + .name = "integrity", + .attrs = integrity_attrs, }; static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter) @@ -433,21 +402,3 @@ void blk_integrity_unregister(struct gendisk *disk) memset(bi, 0, sizeof(*bi)); } EXPORT_SYMBOL(blk_integrity_unregister); - -int blk_integrity_add(struct gendisk *disk) -{ - int ret; - - ret = kobject_init_and_add(&disk->integrity_kobj, &integrity_ktype, - &disk_to_dev(disk)->kobj, "%s", "integrity"); - if (!ret) - kobject_uevent(&disk->integrity_kobj, KOBJ_ADD); - return ret; -} - -void blk_integrity_del(struct gendisk *disk) -{ - kobject_uevent(&disk->integrity_kobj, KOBJ_REMOVE); - kobject_del(&disk->integrity_kobj); - kobject_put(&disk->integrity_kobj); -} diff --git a/block/blk.h b/block/blk.h index 564119a76bc5..45547bcf1119 100644 --- a/block/blk.h +++ b/block/blk.h @@ -214,8 +214,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req, bip_next->bip_vec[0].bv_offset); } -int blk_integrity_add(struct gendisk *disk); -void blk_integrity_del(struct gendisk *); +extern const struct attribute_group blk_integrity_attr_group; #else /* CONFIG_BLK_DEV_INTEGRITY */ static inline bool blk_integrity_merge_rq(struct request_queue *rq, struct request *r1, struct request *r2) @@ -248,13 +247,6 @@ static inline bool bio_integrity_endio(struct bio *bio) static inline void bio_integrity_free(struct bio *bio) { } -static inline int blk_integrity_add(struct gendisk *disk) -{ - return 0; -} -static inline void blk_integrity_del(struct gendisk *disk) -{ -} #endif /* CONFIG_BLK_DEV_INTEGRITY */ unsigned long blk_rq_timeout(unsigned long timeout); diff --git a/block/genhd.c b/block/genhd.c index 8d56fb5f08b2..9fa4a7cd978c 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -478,15 +478,11 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, */ pm_runtime_set_memalloc_noio(ddev, true); - ret = blk_integrity_add(disk); - if (ret) - goto out_del_block_link; - disk->part0->bd_holder_dir = kobject_create_and_add("holders", &ddev->kobj); if (!disk->part0->bd_holder_dir) { ret = -ENOMEM; - goto out_del_integrity; + goto out_del_block_link; } disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); if (!disk->slave_dir) { @@ -549,8 +545,6 @@ out_put_slave_dir: disk->slave_dir = NULL; out_put_holder_dir: kobject_put(disk->part0->bd_holder_dir); -out_del_integrity: - blk_integrity_del(disk); out_del_block_link: if (!sysfs_deprecated) sysfs_remove_link(block_depr, dev_name(ddev)); @@ -613,7 +607,6 @@ void del_gendisk(struct gendisk *disk) if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN))) return; - blk_integrity_del(disk); disk_del_events(disk); mutex_lock(&disk->open_mutex); @@ -1150,6 +1143,9 @@ static const struct attribute_group *disk_attr_groups[] = { &disk_attr_group, #ifdef CONFIG_BLK_DEV_IO_TRACE &blk_trace_attr_group, +#endif +#ifdef CONFIG_BLK_DEV_INTEGRITY + &blk_integrity_attr_group, #endif NULL }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6ede578dfbc6..aac5c8d7a9ff 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -157,9 +157,6 @@ struct gendisk { struct timer_rand_state *random; atomic_t sync_io; /* RAID */ struct disk_events *ev; -#ifdef CONFIG_BLK_DEV_INTEGRITY - struct kobject integrity_kobj; -#endif /* CONFIG_BLK_DEV_INTEGRITY */ #ifdef CONFIG_BLK_DEV_ZONED /* -- cgit v1.2.3 From fd9b8547bc5c34186dc42ea05fb4380d21695374 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 4 May 2023 05:18:55 -0700 Subject: io_uring: Pass whole sqe to commands Currently uring CMD operation relies on having large SQEs, but future operations might want to use normal SQE. The io_uring_cmd currently only saves the payload (cmd) part of the SQE, but, for commands that use normal SQE size, it might be necessary to access the initial SQE fields outside of the payload/cmd block. So, saves the whole SQE other than just the pdu. This changes slightly how the io_uring_cmd works, since the cmd structures and callbacks are not opaque to io_uring anymore. I.e, the callbacks can look at the SQE entries, not only, in the cmd structure. The main advantage is that we don't need to create custom structures for simple commands. Creates io_uring_sqe_cmd() that returns the cmd private data as a null pointer and avoids casting in the callee side. Also, make most of ublk_drv's sqe->cmd priv structure into const, and use io_uring_sqe_cmd() to get the private structure, removing the unwanted cast. (There is one case where the cast is still needed since the header->{len,addr} is updated in the private structure) Suggested-by: Pavel Begunkov Signed-off-by: Breno Leitao Reviewed-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Pavel Begunkov Link: https://lore.kernel.org/r/20230504121856.904491-3-leitao@debian.org Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 26 +++++++++++++------------- drivers/nvme/host/ioctl.c | 2 +- include/linux/io_uring.h | 7 ++++++- io_uring/opdef.c | 2 +- io_uring/uring_cmd.c | 9 +++------ 5 files changed, 24 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index c73cc57ec547..42f4d7ca962e 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1019,7 +1019,7 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma) } static void ublk_commit_completion(struct ublk_device *ub, - struct ublksrv_io_cmd *ub_cmd) + const struct ublksrv_io_cmd *ub_cmd) { u32 qid = ub_cmd->q_id, tag = ub_cmd->tag; struct ublk_queue *ubq = ublk_get_queue(ub, qid); @@ -1263,7 +1263,7 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) { - struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd; + const struct ublksrv_io_cmd *ub_cmd = io_uring_sqe_cmd(cmd->sqe); struct ublk_device *ub = cmd->file->private_data; struct ublk_queue *ubq; struct ublk_io *io; @@ -1567,7 +1567,7 @@ static struct ublk_device *ublk_get_device_from_id(int idx) static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); int ublksrv_pid = (int)header->data[0]; struct gendisk *disk; int ret = -EINVAL; @@ -1630,7 +1630,7 @@ out_unlock: static int ublk_ctrl_get_queue_affinity(struct ublk_device *ub, struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; cpumask_var_t cpumask; unsigned long queue; @@ -1681,7 +1681,7 @@ static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info) static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; struct ublksrv_ctrl_dev_info info; struct ublk_device *ub; @@ -1844,7 +1844,7 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub) static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); pr_devel("%s: cmd_op %x, dev id %d qid %d data %llx buf %llx len %u\n", __func__, cmd->cmd_op, header->dev_id, header->queue_id, @@ -1863,7 +1863,7 @@ static int ublk_ctrl_stop_dev(struct ublk_device *ub) static int ublk_ctrl_get_dev_info(struct ublk_device *ub, struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr) @@ -1894,7 +1894,7 @@ static void ublk_ctrl_fill_params_devt(struct ublk_device *ub) static int ublk_ctrl_get_params(struct ublk_device *ub, struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; struct ublk_params_header ph; int ret; @@ -1925,7 +1925,7 @@ static int ublk_ctrl_get_params(struct ublk_device *ub, static int ublk_ctrl_set_params(struct ublk_device *ub, struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; struct ublk_params_header ph; int ret = -EFAULT; @@ -1983,7 +1983,7 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) static int ublk_ctrl_start_recovery(struct ublk_device *ub, struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); int ret = -EINVAL; int i; @@ -2025,7 +2025,7 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub, static int ublk_ctrl_end_recovery(struct ublk_device *ub, struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); int ublksrv_pid = (int)header->data[0]; int ret = -EINVAL; @@ -2092,7 +2092,7 @@ exit: static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub, struct io_uring_cmd *cmd) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)io_uring_sqe_cmd(cmd->sqe); bool unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV; void __user *argp = (void __user *)(unsigned long)header->addr; char *dev_path = NULL; @@ -2171,7 +2171,7 @@ exit: static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) { - struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); struct ublk_device *ub = NULL; int ret = -EINVAL; diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index d24ea2e05156..81c5c9e38477 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -552,7 +552,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) { struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); - const struct nvme_uring_cmd *cmd = ioucmd->cmd; + const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe); struct request_queue *q = ns ? ns->queue : ctrl->admin_q; struct nvme_uring_data d; struct nvme_command c; diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 35b9328ca335..3399d979ee1c 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -24,7 +24,7 @@ enum io_uring_cmd_flags { struct io_uring_cmd { struct file *file; - const void *cmd; + const struct io_uring_sqe *sqe; union { /* callback to defer completions to task context */ void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned); @@ -66,6 +66,11 @@ static inline void io_uring_free(struct task_struct *tsk) if (tsk->io_uring) __io_uring_free(tsk); } + +static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) +{ + return sqe->cmd; +} #else static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd) diff --git a/io_uring/opdef.c b/io_uring/opdef.c index cca7c5b55208..3b9c6489b8b6 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -627,7 +627,7 @@ const struct io_cold_def io_cold_defs[] = { }, [IORING_OP_URING_CMD] = { .name = "URING_CMD", - .async_size = uring_cmd_pdu_size(1), + .async_size = 2 * sizeof(struct io_uring_sqe), .prep_async = io_uring_cmd_prep_async, }, [IORING_OP_SEND_ZC] = { diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 5113c9a48583..ed536d7499db 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -69,15 +69,12 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_done); int io_uring_cmd_prep_async(struct io_kiocb *req) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); - size_t cmd_size; BUILD_BUG_ON(uring_cmd_pdu_size(0) != 16); BUILD_BUG_ON(uring_cmd_pdu_size(1) != 80); - cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128); - - memcpy(req->async_data, ioucmd->cmd, cmd_size); - ioucmd->cmd = req->async_data; + memcpy(req->async_data, ioucmd->sqe, uring_sqe_size(req->ctx)); + ioucmd->sqe = req->async_data; return 0; } @@ -103,7 +100,7 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->imu = ctx->user_bufs[index]; io_req_set_rsrc_node(req, ctx, 0); } - ioucmd->cmd = sqe->cmd; + ioucmd->sqe = sqe; ioucmd->cmd_op = READ_ONCE(sqe->cmd_op); return 0; } -- cgit v1.2.3 From 162bd18eb55adf464a0fa2b4144b8d61c75ff7c2 Mon Sep 17 00:00:00 2001 From: Roy Novich Date: Sun, 7 May 2023 16:57:43 +0300 Subject: linux/dim: Do nothing if no time delta between samples Add return value for dim_calc_stats. This is an indication for the caller if curr_stats was assigned by the function. Avoid using curr_stats uninitialized over {rdma/net}_dim, when no time delta between samples. Coverity reported this potential use of an uninitialized variable. Fixes: 4c4dbb4a7363 ("net/mlx5e: Move dynamic interrupt coalescing code to include/linux") Fixes: cb3c7fd4f839 ("net/mlx5e: Support adaptive RX coalescing") Signed-off-by: Roy Novich Reviewed-by: Aya Levin Reviewed-by: Saeed Mahameed Signed-off-by: Tariq Toukan Reviewed-by: Leon Romanovsky Reviewed-by: Michal Kubiak Link: https://lore.kernel.org/r/20230507135743.138993-1-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- include/linux/dim.h | 3 ++- lib/dim/dim.c | 5 +++-- lib/dim/net_dim.c | 3 ++- lib/dim/rdma_dim.c | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dim.h b/include/linux/dim.h index 6c5733981563..f343bc9aa2ec 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -236,8 +236,9 @@ void dim_park_tired(struct dim *dim); * * Calculate the delta between two samples (in data rates). * Takes into consideration counter wrap-around. + * Returned boolean indicates whether curr_stats are reliable. */ -void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, +bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end, struct dim_stats *curr_stats); /** diff --git a/lib/dim/dim.c b/lib/dim/dim.c index 38045d6d0538..e89aaf07bde5 100644 --- a/lib/dim/dim.c +++ b/lib/dim/dim.c @@ -54,7 +54,7 @@ void dim_park_tired(struct dim *dim) } EXPORT_SYMBOL(dim_park_tired); -void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, +bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end, struct dim_stats *curr_stats) { /* u32 holds up to 71 minutes, should be enough */ @@ -66,7 +66,7 @@ void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, start->comp_ctr); if (!delta_us) - return; + return false; curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us); curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us); @@ -79,5 +79,6 @@ void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, else curr_stats->cpe_ratio = 0; + return true; } EXPORT_SYMBOL(dim_calc_stats); diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c index 53f6b9c6e936..4e32f7aaac86 100644 --- a/lib/dim/net_dim.c +++ b/lib/dim/net_dim.c @@ -227,7 +227,8 @@ void net_dim(struct dim *dim, struct dim_sample end_sample) dim->start_sample.event_ctr); if (nevents < DIM_NEVENTS) break; - dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats); + if (!dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats)) + break; if (net_dim_decision(&curr_stats, dim)) { dim->state = DIM_APPLY_NEW_PROFILE; schedule_work(&dim->work); diff --git a/lib/dim/rdma_dim.c b/lib/dim/rdma_dim.c index 15462d54758d..88f779486707 100644 --- a/lib/dim/rdma_dim.c +++ b/lib/dim/rdma_dim.c @@ -88,7 +88,8 @@ void rdma_dim(struct dim *dim, u64 completions) nevents = curr_sample->event_ctr - dim->start_sample.event_ctr; if (nevents < DIM_NEVENTS) break; - dim_calc_stats(&dim->start_sample, curr_sample, &curr_stats); + if (!dim_calc_stats(&dim->start_sample, curr_sample, &curr_stats)) + break; if (rdma_dim_decision(&curr_stats, dim)) { dim->state = DIM_APPLY_NEW_PROFILE; schedule_work(&dim->work); -- cgit v1.2.3 From 4063384ef762cc5946fc7a3f89879e76c6ec51e2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 May 2023 13:18:57 +0000 Subject: net: add vlan_get_protocol_and_depth() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before blamed commit, pskb_may_pull() was used instead of skb_header_pointer() in __vlan_get_protocol() and friends. Few callers depended on skb->head being populated with MAC header, syzbot caught one of them (skb_mac_gso_segment()) Add vlan_get_protocol_and_depth() to make the intent clearer and use it where sensible. This is a more generic fix than commit e9d3f80935b6 ("net/af_packet: make sure to pull mac header") which was dealing with a similar issue. kernel BUG at include/linux/skbuff.h:2655 ! invalid opcode: 0000 [#1] SMP KASAN CPU: 0 PID: 1441 Comm: syz-executor199 Not tainted 6.1.24-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/14/2023 RIP: 0010:__skb_pull include/linux/skbuff.h:2655 [inline] RIP: 0010:skb_mac_gso_segment+0x68f/0x6a0 net/core/gro.c:136 Code: fd 48 8b 5c 24 10 44 89 6b 70 48 c7 c7 c0 ae 0d 86 44 89 e6 e8 a1 91 d0 00 48 c7 c7 00 af 0d 86 48 89 de 31 d2 e8 d1 4a e9 ff <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 RSP: 0018:ffffc90001bd7520 EFLAGS: 00010286 RAX: ffffffff8469736a RBX: ffff88810f31dac0 RCX: ffff888115a18b00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffc90001bd75e8 R08: ffffffff84697183 R09: fffff5200037adf9 R10: 0000000000000000 R11: dffffc0000000001 R12: 0000000000000012 R13: 000000000000fee5 R14: 0000000000005865 R15: 000000000000fed7 FS: 000055555633f300(0000) GS:ffff8881f6a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000000 CR3: 0000000116fea000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: [] __skb_gso_segment+0x32d/0x4c0 net/core/dev.c:3419 [] skb_gso_segment include/linux/netdevice.h:4819 [inline] [] validate_xmit_skb+0x3aa/0xee0 net/core/dev.c:3725 [] __dev_queue_xmit+0x1332/0x3300 net/core/dev.c:4313 [] dev_queue_xmit+0x17/0x20 include/linux/netdevice.h:3029 [] packet_snd net/packet/af_packet.c:3111 [inline] [] packet_sendmsg+0x49d2/0x6470 net/packet/af_packet.c:3142 [] sock_sendmsg_nosec net/socket.c:716 [inline] [] sock_sendmsg net/socket.c:736 [inline] [] __sys_sendto+0x472/0x5f0 net/socket.c:2139 [] __do_sys_sendto net/socket.c:2151 [inline] [] __se_sys_sendto net/socket.c:2147 [inline] [] __x64_sys_sendto+0xe5/0x100 net/socket.c:2147 [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [] do_syscall_64+0x2f/0x50 arch/x86/entry/common.c:80 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 469aceddfa3e ("vlan: consolidate VLAN parsing code and limit max parsing depth") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Toke Høiland-Jørgensen Cc: Willem de Bruijn Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/tap.c | 4 ++-- include/linux/if_vlan.h | 17 +++++++++++++++++ net/bridge/br_forward.c | 2 +- net/core/dev.c | 2 +- net/packet/af_packet.c | 6 ++---- 5 files changed, 23 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/tap.c b/drivers/net/tap.c index ce993cc75bf3..d30d730ed5a7 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -742,7 +742,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, /* Move network header to the right position for VLAN tagged packets */ if (eth_type_vlan(skb->protocol) && - __vlan_get_protocol(skb, skb->protocol, &depth) != 0) + vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) skb_set_network_header(skb, depth); /* copy skb_ubuf_info for callback when skb has no error */ @@ -1197,7 +1197,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) /* Move network header to the right position for VLAN tagged packets */ if (eth_type_vlan(skb->protocol) && - __vlan_get_protocol(skb, skb->protocol, &depth) != 0) + vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) skb_set_network_header(skb, depth); rcu_read_lock(); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 0f40f379d75c..6ba71957851e 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -637,6 +637,23 @@ static inline __be16 vlan_get_protocol(const struct sk_buff *skb) return __vlan_get_protocol(skb, skb->protocol, NULL); } +/* This version of __vlan_get_protocol() also pulls mac header in skb->head */ +static inline __be16 vlan_get_protocol_and_depth(struct sk_buff *skb, + __be16 type, int *depth) +{ + int maclen; + + type = __vlan_get_protocol(skb, type, &maclen); + + if (type) { + if (!pskb_may_pull(skb, maclen)) + type = 0; + else if (depth) + *depth = maclen; + } + return type; +} + /* A getter for the SKB protocol field which will handle VLAN tags consistently * whether VLAN acceleration is enabled or not. */ diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 57744704ff69..84d6dd5e5b1a 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -42,7 +42,7 @@ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb eth_type_vlan(skb->protocol)) { int depth; - if (!__vlan_get_protocol(skb, skb->protocol, &depth)) + if (!vlan_get_protocol_and_depth(skb, skb->protocol, &depth)) goto drop; skb_set_network_header(skb, depth); diff --git a/net/core/dev.c b/net/core/dev.c index 735096d42c1d..b3c13e041935 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3335,7 +3335,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) type = eth->h_proto; } - return __vlan_get_protocol(skb, type, depth); + return vlan_get_protocol_and_depth(skb, type, depth); } /* openvswitch calls this on rx path, so we need a different check. diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 640d94e34635..94c6a1ffa459 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1934,10 +1934,8 @@ static void packet_parse_headers(struct sk_buff *skb, struct socket *sock) /* Move network header to the right position for VLAN tagged packets */ if (likely(skb->dev->type == ARPHRD_ETHER) && eth_type_vlan(skb->protocol) && - __vlan_get_protocol(skb, skb->protocol, &depth) != 0) { - if (pskb_may_pull(skb, depth)) - skb_set_network_header(skb, depth); - } + vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) + skb_set_network_header(skb, depth); skb_probe_transport_header(skb); } -- cgit v1.2.3